PR c++/68475
[official-gcc.git] / gcc / hsa-brig.c
blob018b7be12fb6d0348a79e57f7e716dad0bc910a5
1 /* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "target.h"
27 #include "tm_p.h"
28 #include "is-a.h"
29 #include "vec.h"
30 #include "hash-table.h"
31 #include "hash-map.h"
32 #include "tree.h"
33 #include "tree-iterator.h"
34 #include "stor-layout.h"
35 #include "output.h"
36 #include "cfg.h"
37 #include "function.h"
38 #include "fold-const.h"
39 #include "stringpool.h"
40 #include "gimple-pretty-print.h"
41 #include "diagnostic-core.h"
42 #include "cgraph.h"
43 #include "dumpfile.h"
44 #include "print-tree.h"
45 #include "symbol-summary.h"
46 #include "hsa.h"
47 #include "gomp-constants.h"
49 /* Convert VAL to little endian form, if necessary. */
51 static uint16_t
52 lendian16 (uint16_t val)
54 #if GCC_VERSION >= 4006
55 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
56 return val;
57 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
58 return __builtin_bswap16 (val);
59 #else /* __ORDER_PDP_ENDIAN__ */
60 return val;
61 #endif
62 #else
63 // provide a safe slower default, with shifts and masking
64 #ifndef WORDS_BIGENDIAN
65 return val;
66 #else
67 return (val >> 8) | (val << 8);
68 #endif
69 #endif
72 /* Convert VAL to little endian form, if necessary. */
74 static uint32_t
75 lendian32 (uint32_t val)
77 #if GCC_VERSION >= 4006
78 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
79 return val;
80 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
81 return __builtin_bswap32 (val);
82 #else /* __ORDER_PDP_ENDIAN__ */
83 return (val >> 16) | (val << 16);
84 #endif
85 #else
86 // provide a safe slower default, with shifts and masking
87 #ifndef WORDS_BIGENDIAN
88 return val;
89 #else
90 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
91 return (val >> 16) | (val << 16);
92 #endif
93 #endif
96 /* Convert VAL to little endian form, if necessary. */
98 static uint64_t
99 lendian64 (uint64_t val)
101 #if GCC_VERSION >= 4006
102 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
103 return val;
104 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
105 return __builtin_bswap64 (val);
106 #else /* __ORDER_PDP_ENDIAN__ */
107 return (((val & 0xffffll) << 48)
108 | ((val & 0xffff0000ll) << 16)
109 | ((val & 0xffff00000000ll) >> 16)
110 | ((val & 0xffff000000000000ll) >> 48));
111 #endif
112 #else
113 // provide a safe slower default, with shifts and masking
114 #ifndef WORDS_BIGENDIAN
115 return val;
116 #else
117 val = (((val & 0xff00ff00ff00ff00ll) >> 8)
118 | ((val & 0x00ff00ff00ff00ffll) << 8));
119 val = ((( val & 0xffff0000ffff0000ll) >> 16)
120 | (( val & 0x0000ffff0000ffffll) << 16));
121 return (val >> 32) | (val << 32);
122 #endif
123 #endif
126 #define BRIG_ELF_SECTION_NAME ".brig"
127 #define BRIG_LABEL_STRING "hsa_brig"
128 #define BRIG_SECTION_DATA_NAME "hsa_data"
129 #define BRIG_SECTION_CODE_NAME "hsa_code"
130 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
132 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
134 /* Required HSA section alignment. */
136 #define HSA_SECTION_ALIGNMENT 16
138 /* Chunks of BRIG binary data. */
140 struct hsa_brig_data_chunk
142 /* Size of the data already stored into a chunk. */
143 unsigned size;
145 /* Pointer to the data. */
146 char *data;
149 /* Structure representing a BRIG section, holding and writing its data. */
151 class hsa_brig_section
153 public:
154 /* Section name that will be output to the BRIG. */
155 const char *section_name;
156 /* Size in bytes of all data stored in the section. */
157 unsigned total_size;
158 /* The size of the header of the section including padding. */
159 unsigned header_byte_count;
160 /* The size of the header of the section without any padding. */
161 unsigned header_byte_delta;
163 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
164 vec <struct hsa_brig_data_chunk> chunks;
166 /* More convenient access to the last chunk from the vector above. */
167 struct hsa_brig_data_chunk *cur_chunk;
169 void allocate_new_chunk ();
170 void init (const char *name);
171 void release ();
172 void output ();
173 unsigned add (const void *data, unsigned len);
174 void round_size_up (int factor);
175 void *get_ptr_by_offset (unsigned int offset);
178 static struct hsa_brig_section brig_data, brig_code, brig_operand;
179 static uint32_t brig_insn_count;
180 static bool brig_initialized = false;
182 /* Mapping between emitted HSA functions and their offset in code segment. */
183 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
185 /* Hash map of emitted function declarations. */
186 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
188 /* Hash table of emitted internal function declaration offsets. */
189 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
191 /* List of sbr instructions. */
192 static vec <hsa_insn_sbr *> *switch_instructions;
194 struct function_linkage_pair
196 function_linkage_pair (tree decl, unsigned int off)
197 : function_decl (decl), offset (off) {}
199 /* Declaration of called function. */
200 tree function_decl;
202 /* Offset in operand section. */
203 unsigned int offset;
206 /* Vector of function calls where we need to resolve function offsets. */
207 static auto_vec <function_linkage_pair> function_call_linkage;
209 /* Add a new chunk, allocate data for it and initialize it. */
211 void
212 hsa_brig_section::allocate_new_chunk ()
214 struct hsa_brig_data_chunk new_chunk;
216 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
217 new_chunk.size = 0;
218 cur_chunk = chunks.safe_push (new_chunk);
221 /* Initialize the brig section. */
223 void
224 hsa_brig_section::init (const char *name)
226 section_name = name;
227 /* While the following computation is basically wrong, because the intent
228 certainly wasn't to have the first character of name and padding, which
229 are a part of sizeof (BrigSectionHeader), included in the first addend,
230 this is what the disassembler expects. */
231 total_size = sizeof (BrigSectionHeader) + strlen (section_name);
232 chunks.create (1);
233 allocate_new_chunk ();
234 header_byte_delta = total_size;
235 round_size_up (4);
236 header_byte_count = total_size;
239 /* Free all data in the section. */
241 void
242 hsa_brig_section::release ()
244 for (unsigned i = 0; i < chunks.length (); i++)
245 free (chunks[i].data);
246 chunks.release ();
247 cur_chunk = NULL;
250 /* Write the section to the output file to a section with the name given at
251 initialization. Switches the output section and does not restore it. */
253 void
254 hsa_brig_section::output ()
256 struct BrigSectionHeader section_header;
257 char padding[8];
259 section_header.byteCount = lendian64 (total_size);
260 section_header.headerByteCount = lendian32 (header_byte_count);
261 section_header.nameLength = lendian32 (strlen (section_name));
262 assemble_string ((const char *) &section_header, 16);
263 assemble_string (section_name, (section_header.nameLength));
264 memset (&padding, 0, sizeof (padding));
265 /* This is also a consequence of the wrong header size computation described
266 in a comment in hsa_brig_section::init. */
267 assemble_string (padding, 8);
268 for (unsigned i = 0; i < chunks.length (); i++)
269 assemble_string (chunks[i].data, chunks[i].size);
272 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
273 which it was stored. */
275 unsigned
276 hsa_brig_section::add (const void *data, unsigned len)
278 unsigned offset = total_size;
280 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
281 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
282 allocate_new_chunk ();
284 memcpy (cur_chunk->data + cur_chunk->size, data, len);
285 cur_chunk->size += len;
286 total_size += len;
288 return offset;
291 /* Add padding to section so that its size is divisible by FACTOR. */
293 void
294 hsa_brig_section::round_size_up (int factor)
296 unsigned padding, res = total_size % factor;
298 if (res == 0)
299 return;
301 padding = factor - res;
302 total_size += padding;
303 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
305 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
306 cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
307 allocate_new_chunk ();
310 cur_chunk->size += padding;
313 /* Return pointer to data by global OFFSET in the section. */
315 void *
316 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
318 gcc_assert (offset < total_size);
319 offset -= header_byte_delta;
321 unsigned i;
322 for (i = 0; offset >= chunks[i].size; i++)
323 offset -= chunks[i].size;
325 return chunks[i].data + offset;
328 /* BRIG string data hashing. */
330 struct brig_string_slot
332 const char *s;
333 char prefix;
334 int len;
335 uint32_t offset;
338 /* Hash table helpers. */
340 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
342 static inline hashval_t hash (const value_type);
343 static inline bool equal (const value_type, const compare_type);
344 static inline void remove (value_type);
347 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
348 to support strings that may not end in '\0'. */
350 inline hashval_t
351 brig_string_slot_hasher::hash (const value_type ds)
353 hashval_t r = ds->len;
354 int i;
356 for (i = 0; i < ds->len; i++)
357 r = r * 67 + (unsigned) ds->s[i] - 113;
358 r = r * 67 + (unsigned) ds->prefix - 113;
359 return r;
362 /* Returns nonzero if DS1 and DS2 are equal. */
364 inline bool
365 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
367 if (ds1->len == ds2->len)
368 return ds1->prefix == ds2->prefix
369 && memcmp (ds1->s, ds2->s, ds1->len) == 0;
371 return 0;
374 /* Deallocate memory for DS upon its removal. */
376 inline void
377 brig_string_slot_hasher::remove (value_type ds)
379 free (const_cast<char *> (ds->s));
380 free (ds);
383 /* Hash for strings we output in order not to duplicate them needlessly. */
385 static hash_table<brig_string_slot_hasher> *brig_string_htab;
387 /* Emit a null terminated string STR to the data section and return its
388 offset in it. If PREFIX is non-zero, output it just before STR too.
389 Sanitize the string if SANITIZE option is set to true. */
391 static unsigned
392 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
394 unsigned slen = strlen (str);
395 unsigned offset, len = slen + (prefix ? 1 : 0);
396 uint32_t hdr_len = lendian32 (len);
397 brig_string_slot s_slot;
398 brig_string_slot **slot;
399 char *str2;
401 str2 = xstrdup (str);
403 if (sanitize)
404 hsa_sanitize_name (str2);
405 s_slot.s = str2;
406 s_slot.len = slen;
407 s_slot.prefix = prefix;
408 s_slot.offset = 0;
410 slot = brig_string_htab->find_slot (&s_slot, INSERT);
411 if (*slot == NULL)
413 brig_string_slot *new_slot = XCNEW (brig_string_slot);
415 /* In theory we should fill in BrigData but that would mean copying
416 the string to a buffer for no reason, so we just emulate it. */
417 offset = brig_data.add (&hdr_len, sizeof (hdr_len));
418 if (prefix)
419 brig_data.add (&prefix, 1);
421 brig_data.add (str2, slen);
422 brig_data.round_size_up (4);
424 /* TODO: could use the string we just copied into
425 brig_string->cur_chunk */
426 new_slot->s = str2;
427 new_slot->len = slen;
428 new_slot->prefix = prefix;
429 new_slot->offset = offset;
430 *slot = new_slot;
432 else
434 offset = (*slot)->offset;
435 free (str2);
438 return offset;
441 /* Linked list of queued operands. */
443 static struct operand_queue
445 /* First from the chain of queued operands. */
446 hsa_op_base *first_op, *last_op;
448 /* The offset at which the next operand will be enqueued. */
449 unsigned projected_size;
451 } op_queue;
453 /* Unless already initialized, initialize infrastructure to produce BRIG. */
455 static void
456 brig_init (void)
458 brig_insn_count = 0;
460 if (brig_initialized)
461 return;
463 brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
464 brig_data.init (BRIG_SECTION_DATA_NAME);
465 brig_code.init (BRIG_SECTION_CODE_NAME);
466 brig_operand.init (BRIG_SECTION_OPERAND_NAME);
467 brig_initialized = true;
469 struct BrigDirectiveModule moddir;
470 memset (&moddir, 0, sizeof (moddir));
471 moddir.base.byteCount = lendian16 (sizeof (moddir));
473 char *modname;
474 if (main_input_filename && *main_input_filename != '\0')
476 const char *part = strrchr (main_input_filename, '/');
477 if (!part)
478 part = main_input_filename;
479 else
480 part++;
481 modname = concat ("&__hsa_module_", part, NULL);
482 char *extension = strchr (modname, '.');
483 if (extension)
484 *extension = '\0';
486 /* As in LTO mode, we have to emit a different module names. */
487 if (flag_ltrans)
489 part = strrchr (asm_file_name, '/');
490 if (!part)
491 part = asm_file_name;
492 else
493 part++;
494 char *modname2;
495 asprintf (&modname2, "%s_%s", modname, part);
496 free (modname);
497 modname = modname2;
500 hsa_sanitize_name (modname);
501 moddir.name = brig_emit_string (modname);
502 free (modname);
504 else
505 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
506 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
507 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
508 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
509 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
510 if (hsa_machine_large_p ())
511 moddir.machineModel = BRIG_MACHINE_LARGE;
512 else
513 moddir.machineModel = BRIG_MACHINE_SMALL;
514 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
515 brig_code.add (&moddir, sizeof (moddir));
518 /* Free all BRIG data. */
520 static void
521 brig_release_data (void)
523 delete brig_string_htab;
524 brig_data.release ();
525 brig_code.release ();
526 brig_operand.release ();
528 brig_initialized = 0;
531 /* Enqueue operation OP. Return the offset at which it will be stored. */
533 static unsigned int
534 enqueue_op (hsa_op_base *op)
536 unsigned ret;
538 if (op->m_brig_op_offset)
539 return op->m_brig_op_offset;
541 ret = op_queue.projected_size;
542 op->m_brig_op_offset = op_queue.projected_size;
544 if (!op_queue.first_op)
545 op_queue.first_op = op;
546 else
547 op_queue.last_op->m_next = op;
548 op_queue.last_op = op;
550 if (is_a <hsa_op_immed *> (op))
551 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
552 else if (is_a <hsa_op_reg *> (op))
553 op_queue.projected_size += sizeof (struct BrigOperandRegister);
554 else if (is_a <hsa_op_address *> (op))
555 op_queue.projected_size += sizeof (struct BrigOperandAddress);
556 else if (is_a <hsa_op_code_ref *> (op))
557 op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
558 else if (is_a <hsa_op_code_list *> (op))
559 op_queue.projected_size += sizeof (struct BrigOperandCodeList);
560 else if (is_a <hsa_op_operand_list *> (op))
561 op_queue.projected_size += sizeof (struct BrigOperandOperandList);
562 else
563 gcc_unreachable ();
564 return ret;
568 /* Emit directive describing a symbol if it has not been emitted already.
569 Return the offset of the directive. */
571 static unsigned
572 emit_directive_variable (struct hsa_symbol *symbol)
574 struct BrigDirectiveVariable dirvar;
575 unsigned name_offset;
576 static unsigned res_name_offset;
578 if (symbol->m_directive_offset)
579 return symbol->m_directive_offset;
581 memset (&dirvar, 0, sizeof (dirvar));
582 dirvar.base.byteCount = lendian16 (sizeof (dirvar));
583 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
584 dirvar.allocation = symbol->m_allocation;
586 char prefix = symbol->m_global_scope_p ? '&' : '%';
588 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
590 if (res_name_offset == 0)
591 res_name_offset = brig_emit_string (symbol->m_name, '%');
592 name_offset = res_name_offset;
594 else if (symbol->m_name)
595 name_offset = brig_emit_string (symbol->m_name, prefix);
596 else
598 char buf[64];
599 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
600 symbol->m_name_number);
601 name_offset = brig_emit_string (buf, prefix);
604 dirvar.name = lendian32 (name_offset);
605 dirvar.init = 0;
606 dirvar.type = lendian16 (symbol->m_type);
607 dirvar.segment = symbol->m_segment;
608 dirvar.align = symbol->m_align;
609 dirvar.linkage = symbol->m_linkage;
610 dirvar.dim.lo = symbol->m_dim;
611 dirvar.dim.hi = symbol->m_dim >> 32;
613 /* Global variables are just declared and linked via HSA runtime. */
614 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
615 dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
616 dirvar.reserved = 0;
618 if (symbol->m_cst_value)
620 dirvar.modifier |= BRIG_VARIABLE_CONST;
621 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
624 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
625 return symbol->m_directive_offset;
628 /* Emit directives describing either a function declaration or
629 definition F. */
631 static BrigDirectiveExecutable *
632 emit_function_directives (hsa_function_representation *f, bool is_declaration)
634 struct BrigDirectiveExecutable fndir;
635 unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
636 int count = 0;
637 BrigDirectiveExecutable *ptr_to_fndir;
638 hsa_symbol *sym;
640 if (!f->m_declaration_p)
641 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
643 gcc_assert (!sym->m_emitted_to_brig);
644 sym->m_emitted_to_brig = true;
645 emit_directive_variable (sym);
646 brig_insn_count++;
649 name_offset = brig_emit_string (f->m_name, '&');
650 inarg_off = brig_code.total_size + sizeof (fndir)
651 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
652 scoped_off = inarg_off
653 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
655 if (!f->m_declaration_p)
657 count += f->m_spill_symbols.length ();
658 count += f->m_private_variables.length ();
661 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
663 memset (&fndir, 0, sizeof (fndir));
664 fndir.base.byteCount = lendian16 (sizeof (fndir));
665 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
666 : BRIG_KIND_DIRECTIVE_FUNCTION);
667 fndir.name = lendian32 (name_offset);
668 fndir.inArgCount = lendian16 (f->m_input_args.length ());
669 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
670 fndir.firstInArg = lendian32 (inarg_off);
671 fndir.firstCodeBlockEntry = lendian32 (scoped_off);
672 fndir.nextModuleEntry = lendian32 (next_toplev_off);
673 fndir.linkage = f->get_linkage ();
674 if (!f->m_declaration_p)
675 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
676 memset (&fndir.reserved, 0, sizeof (fndir.reserved));
678 /* Once we put a definition of function_offsets, we should not overwrite
679 it with a declaration of the function. */
680 if (f->m_internal_fn == NULL)
682 if (!function_offsets->get (f->m_decl) || !is_declaration)
683 function_offsets->put (f->m_decl, brig_code.total_size);
685 else
687 /* Internal function. */
688 hsa_internal_fn **slot
689 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
690 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
691 int_fn->m_offset = brig_code.total_size;
692 *slot = int_fn;
695 brig_code.add (&fndir, sizeof (fndir));
696 /* terrible hack: we need to set instCount after we emit all
697 insns, but we need to emit directive in order, and we emit directives
698 during insn emitting. So we need to emit the FUNCTION directive
699 early, then the insns, and then we need to set instCount, so remember
700 a pointer to it, in some horrible way. cur_chunk.data+size points
701 directly to after fndir here. */
702 ptr_to_fndir
703 = (BrigDirectiveExecutable *)(brig_code.cur_chunk->data
704 + brig_code.cur_chunk->size
705 - sizeof (fndir));
707 if (f->m_output_arg)
708 emit_directive_variable (f->m_output_arg);
709 for (unsigned i = 0; i < f->m_input_args.length (); i++)
710 emit_directive_variable (f->m_input_args[i]);
712 if (!f->m_declaration_p)
714 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
716 emit_directive_variable (sym);
717 brig_insn_count++;
719 for (unsigned i = 0; i < f->m_private_variables.length (); i++)
721 emit_directive_variable (f->m_private_variables[i]);
722 brig_insn_count++;
726 return ptr_to_fndir;
729 /* Emit a label directive for the given HBB. We assume it is about to start on
730 the current offset in the code section. */
732 static void
733 emit_bb_label_directive (hsa_bb *hbb)
735 struct BrigDirectiveLabel lbldir;
737 lbldir.base.byteCount = lendian16 (sizeof (lbldir));
738 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
739 char buf[32];
740 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
741 hbb->m_index);
742 lbldir.name = lendian32 (brig_emit_string (buf, '@'));
744 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
745 sizeof (lbldir));
746 brig_insn_count++;
749 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
750 holding such, for constants and registers. */
752 static BrigType16_t
753 regtype_for_type (BrigType16_t t)
755 switch (t)
757 case BRIG_TYPE_B1:
758 return BRIG_TYPE_B1;
760 case BRIG_TYPE_U8:
761 case BRIG_TYPE_U16:
762 case BRIG_TYPE_U32:
763 case BRIG_TYPE_S8:
764 case BRIG_TYPE_S16:
765 case BRIG_TYPE_S32:
766 case BRIG_TYPE_B8:
767 case BRIG_TYPE_B16:
768 case BRIG_TYPE_B32:
769 case BRIG_TYPE_F16:
770 case BRIG_TYPE_F32:
771 case BRIG_TYPE_U8X4:
772 case BRIG_TYPE_U16X2:
773 case BRIG_TYPE_S8X4:
774 case BRIG_TYPE_S16X2:
775 case BRIG_TYPE_F16X2:
776 return BRIG_TYPE_B32;
778 case BRIG_TYPE_U64:
779 case BRIG_TYPE_S64:
780 case BRIG_TYPE_F64:
781 case BRIG_TYPE_B64:
782 case BRIG_TYPE_U8X8:
783 case BRIG_TYPE_U16X4:
784 case BRIG_TYPE_U32X2:
785 case BRIG_TYPE_S8X8:
786 case BRIG_TYPE_S16X4:
787 case BRIG_TYPE_S32X2:
788 case BRIG_TYPE_F16X4:
789 case BRIG_TYPE_F32X2:
790 return BRIG_TYPE_B64;
792 case BRIG_TYPE_B128:
793 case BRIG_TYPE_U8X16:
794 case BRIG_TYPE_U16X8:
795 case BRIG_TYPE_U32X4:
796 case BRIG_TYPE_U64X2:
797 case BRIG_TYPE_S8X16:
798 case BRIG_TYPE_S16X8:
799 case BRIG_TYPE_S32X4:
800 case BRIG_TYPE_S64X2:
801 case BRIG_TYPE_F16X8:
802 case BRIG_TYPE_F32X4:
803 case BRIG_TYPE_F64X2:
804 return BRIG_TYPE_B128;
806 default:
807 gcc_unreachable ();
811 /* Return the length of the BRIG type TYPE that is going to be streamed out as
812 an immediate constant (so it must not be B1). */
814 unsigned
815 hsa_get_imm_brig_type_len (BrigType16_t type)
817 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
818 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
820 switch (pack_type)
822 case BRIG_TYPE_PACK_NONE:
823 break;
824 case BRIG_TYPE_PACK_32:
825 return 4;
826 case BRIG_TYPE_PACK_64:
827 return 8;
828 case BRIG_TYPE_PACK_128:
829 return 16;
830 default:
831 gcc_unreachable ();
834 switch (base_type)
836 case BRIG_TYPE_U8:
837 case BRIG_TYPE_S8:
838 case BRIG_TYPE_B8:
839 return 1;
840 case BRIG_TYPE_U16:
841 case BRIG_TYPE_S16:
842 case BRIG_TYPE_F16:
843 case BRIG_TYPE_B16:
844 return 2;
845 case BRIG_TYPE_U32:
846 case BRIG_TYPE_S32:
847 case BRIG_TYPE_F32:
848 case BRIG_TYPE_B32:
849 return 4;
850 case BRIG_TYPE_U64:
851 case BRIG_TYPE_S64:
852 case BRIG_TYPE_F64:
853 case BRIG_TYPE_B64:
854 return 8;
855 case BRIG_TYPE_B128:
856 return 16;
857 default:
858 gcc_unreachable ();
862 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
863 If NEED_LEN is not equal to zero, shrink or extend the value
864 to NEED_LEN bytes. Return how many bytes were written. */
866 static int
867 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
869 union hsa_bytes bytes;
871 memset (&bytes, 0, sizeof (bytes));
872 tree type = TREE_TYPE (value);
873 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
875 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
876 if (INTEGRAL_TYPE_P (type)
877 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
878 switch (data_len)
880 case 1:
881 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
882 break;
883 case 2:
884 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
885 break;
886 case 4:
887 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
888 break;
889 case 8:
890 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
891 break;
892 default:
893 gcc_unreachable ();
895 else if (SCALAR_FLOAT_TYPE_P (type))
897 if (data_len == 2)
899 sorry ("Support for HSA does not implement immediate 16 bit FPU "
900 "operands");
901 return 2;
903 unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type));
904 /* There are always 32 bits in each long, no matter the size of
905 the hosts long. */
906 long tmp[6];
908 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
910 if (int_len == 4)
911 bytes.b32 = (uint32_t) tmp[0];
912 else
914 bytes.b64 = (uint64_t)(uint32_t) tmp[1];
915 bytes.b64 <<= 32;
916 bytes.b64 |= (uint32_t) tmp[0];
919 else
920 gcc_unreachable ();
922 int len;
923 if (need_len == 0)
924 len = data_len;
925 else
926 len = need_len;
928 memcpy (data, &bytes, len);
929 return len;
932 char *
933 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
935 char *brig_repr;
936 *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
938 if (m_tree_value != NULL_TREE)
940 /* Update brig_repr_size for special tree values. */
941 if (TREE_CODE (m_tree_value) == STRING_CST)
942 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
943 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
944 *brig_repr_size
945 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
947 unsigned total_len = *brig_repr_size;
949 /* As we can have a constructor with fewer elements, fill the memory
950 with zeros. */
951 brig_repr = XCNEWVEC (char, total_len);
952 char *p = brig_repr;
954 if (TREE_CODE (m_tree_value) == VECTOR_CST)
956 int i, num = VECTOR_CST_NELTS (m_tree_value);
957 for (i = 0; i < num; i++)
959 tree v = VECTOR_CST_ELT (m_tree_value, i);
960 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
961 total_len -= actual;
962 p += actual;
964 /* Vectors should have the exact size. */
965 gcc_assert (total_len == 0);
967 else if (TREE_CODE (m_tree_value) == STRING_CST)
968 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
969 TREE_STRING_LENGTH (m_tree_value));
970 else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
972 gcc_assert (total_len % 2 == 0);
973 unsigned actual;
974 actual
975 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
976 total_len / 2);
978 gcc_assert (actual == total_len / 2);
979 p += actual;
981 actual
982 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
983 total_len / 2);
984 gcc_assert (actual == total_len / 2);
986 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
988 unsigned len = vec_safe_length (CONSTRUCTOR_ELTS (m_tree_value));
989 for (unsigned i = 0; i < len; i++)
991 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
992 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
993 total_len -= actual;
994 p += actual;
997 else
998 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
1000 else
1002 hsa_bytes bytes;
1004 switch (*brig_repr_size)
1006 case 1:
1007 bytes.b8 = (uint8_t) m_int_value;
1008 break;
1009 case 2:
1010 bytes.b16 = (uint16_t) m_int_value;
1011 break;
1012 case 4:
1013 bytes.b32 = (uint32_t) m_int_value;
1014 break;
1015 case 8:
1016 bytes.b64 = (uint64_t) m_int_value;
1017 break;
1018 default:
1019 gcc_unreachable ();
1022 brig_repr = XNEWVEC (char, *brig_repr_size);
1023 memcpy (brig_repr, &bytes, *brig_repr_size);
1026 return brig_repr;
1029 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1030 have been massaged to comply with various HSA/BRIG type requirements, so the
1031 only important aspect of that is the length (because HSAIL might expect
1032 smaller constants or become bit-data). The data should be represented
1033 according to what is in the tree representation. */
1035 static void
1036 emit_immediate_operand (hsa_op_immed *imm)
1038 unsigned brig_repr_size;
1039 char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1040 struct BrigOperandConstantBytes out;
1042 memset (&out, 0, sizeof (out));
1043 out.base.byteCount = lendian16 (sizeof (out));
1044 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1045 uint32_t byteCount = lendian32 (brig_repr_size);
1046 out.type = lendian16 (imm->m_type);
1047 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1048 brig_operand.add (&out, sizeof (out));
1049 brig_data.add (brig_repr, brig_repr_size);
1050 brig_data.round_size_up (4);
1052 free (brig_repr);
1055 /* Emit a register BRIG operand REG. */
1057 static void
1058 emit_register_operand (hsa_op_reg *reg)
1060 struct BrigOperandRegister out;
1062 out.base.byteCount = lendian16 (sizeof (out));
1063 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1064 out.regNum = lendian32 (reg->m_hard_num);
1066 switch (regtype_for_type (reg->m_type))
1068 case BRIG_TYPE_B32:
1069 out.regKind = BRIG_REGISTER_KIND_SINGLE;
1070 break;
1071 case BRIG_TYPE_B64:
1072 out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1073 break;
1074 case BRIG_TYPE_B128:
1075 out.regKind = BRIG_REGISTER_KIND_QUAD;
1076 break;
1077 case BRIG_TYPE_B1:
1078 out.regKind = BRIG_REGISTER_KIND_CONTROL;
1079 break;
1080 default:
1081 gcc_unreachable ();
1084 brig_operand.add (&out, sizeof (out));
1087 /* Emit an address BRIG operand ADDR. */
1089 static void
1090 emit_address_operand (hsa_op_address *addr)
1092 struct BrigOperandAddress out;
1094 out.base.byteCount = lendian16 (sizeof (out));
1095 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1096 out.symbol = addr->m_symbol
1097 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1098 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1100 if (sizeof (addr->m_imm_offset) == 8)
1102 out.offset.lo = lendian32 (addr->m_imm_offset);
1103 out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1105 else
1107 gcc_assert (sizeof (addr->m_imm_offset) == 4);
1108 out.offset.lo = lendian32 (addr->m_imm_offset);
1109 out.offset.hi = 0;
1112 brig_operand.add (&out, sizeof (out));
1115 /* Emit a code reference operand REF. */
1117 static void
1118 emit_code_ref_operand (hsa_op_code_ref *ref)
1120 struct BrigOperandCodeRef out;
1122 out.base.byteCount = lendian16 (sizeof (out));
1123 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1124 out.ref = lendian32 (ref->m_directive_offset);
1125 brig_operand.add (&out, sizeof (out));
1128 /* Emit a code list operand CODE_LIST. */
1130 static void
1131 emit_code_list_operand (hsa_op_code_list *code_list)
1133 struct BrigOperandCodeList out;
1134 unsigned args = code_list->m_offsets.length ();
1136 for (unsigned i = 0; i < args; i++)
1137 gcc_assert (code_list->m_offsets[i]);
1139 out.base.byteCount = lendian16 (sizeof (out));
1140 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1142 uint32_t byteCount = lendian32 (4 * args);
1144 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1145 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1146 brig_data.round_size_up (4);
1147 brig_operand.add (&out, sizeof (out));
1150 /* Emit an operand list operand OPERAND_LIST. */
1152 static void
1153 emit_operand_list_operand (hsa_op_operand_list *operand_list)
1155 struct BrigOperandOperandList out;
1156 unsigned args = operand_list->m_offsets.length ();
1158 for (unsigned i = 0; i < args; i++)
1159 gcc_assert (operand_list->m_offsets[i]);
1161 out.base.byteCount = lendian16 (sizeof (out));
1162 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1164 uint32_t byteCount = lendian32 (4 * args);
1166 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1167 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1168 brig_data.round_size_up (4);
1169 brig_operand.add (&out, sizeof (out));
1172 /* Emit all operands queued for writing. */
1174 static void
1175 emit_queued_operands (void)
1177 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1179 gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1180 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1181 emit_immediate_operand (imm);
1182 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1183 emit_register_operand (reg);
1184 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1185 emit_address_operand (addr);
1186 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1187 emit_code_ref_operand (ref);
1188 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1189 emit_code_list_operand (code_list);
1190 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1191 emit_operand_list_operand (l);
1192 else
1193 gcc_unreachable ();
1197 /* Emit directives describing the function that is used for
1198 a function declaration. */
1200 static BrigDirectiveExecutable *
1201 emit_function_declaration (tree decl)
1203 hsa_function_representation *f = hsa_generate_function_declaration (decl);
1205 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1206 emit_queued_operands ();
1208 delete f;
1210 return e;
1213 /* Emit directives describing the function that is used for
1214 an internal function declaration. */
1216 static BrigDirectiveExecutable *
1217 emit_internal_fn_decl (hsa_internal_fn *fn)
1219 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1221 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1222 emit_queued_operands ();
1224 delete f;
1226 return e;
1229 /* Enqueue all operands of INSN and return offset to BRIG data section
1230 to list of operand offsets. */
1232 static unsigned
1233 emit_insn_operands (hsa_insn_basic *insn)
1235 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1236 operand_offsets;
1238 unsigned l = insn->operand_count ();
1239 operand_offsets.safe_grow (l);
1241 for (unsigned i = 0; i < l; i++)
1242 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1244 /* We have N operands so use 4 * N for the byte_count. */
1245 uint32_t byte_count = lendian32 (4 * l);
1247 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1248 brig_data.add (operand_offsets.address (),
1249 l * sizeof (BrigOperandOffset32_t));
1251 brig_data.round_size_up (4);
1253 return offset;
1256 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1257 to BRIG data section to list of operand offsets. */
1259 static unsigned
1260 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1261 hsa_op_base *op2 = NULL)
1263 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1264 operand_offsets;
1266 gcc_checking_assert (op0 != NULL);
1267 operand_offsets.safe_push (enqueue_op (op0));
1269 if (op1 != NULL)
1271 operand_offsets.safe_push (enqueue_op (op1));
1272 if (op2 != NULL)
1273 operand_offsets.safe_push (enqueue_op (op2));
1276 unsigned l = operand_offsets.length ();
1278 /* We have N operands so use 4 * N for the byte_count. */
1279 uint32_t byte_count = lendian32 (4 * l);
1281 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1282 brig_data.add (operand_offsets.address (),
1283 l * sizeof (BrigOperandOffset32_t));
1285 brig_data.round_size_up (4);
1287 return offset;
1290 /* Emit an HSA memory instruction and all necessary directives, schedule
1291 necessary operands for writing. */
1293 static void
1294 emit_memory_insn (hsa_insn_mem *mem)
1296 struct BrigInstMem repr;
1297 gcc_checking_assert (mem->operand_count () == 2);
1299 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1301 /* This is necessary because of the erroneous typedef of
1302 BrigMemoryModifier8_t which introduces padding which may then contain
1303 random stuff (which we do not want so that we can test things don't
1304 change). */
1305 memset (&repr, 0, sizeof (repr));
1306 repr.base.base.byteCount = lendian16 (sizeof (repr));
1307 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1308 repr.base.opcode = lendian16 (mem->m_opcode);
1309 repr.base.type = lendian16 (mem->m_type);
1310 repr.base.operands = lendian32 (emit_insn_operands (mem));
1312 if (addr->m_symbol)
1313 repr.segment = addr->m_symbol->m_segment;
1314 else
1315 repr.segment = BRIG_SEGMENT_FLAT;
1316 repr.modifier = 0;
1317 repr.equivClass = mem->m_equiv_class;
1318 repr.align = mem->m_align;
1319 if (mem->m_opcode == BRIG_OPCODE_LD)
1320 repr.width = BRIG_WIDTH_1;
1321 else
1322 repr.width = BRIG_WIDTH_NONE;
1323 memset (&repr.reserved, 0, sizeof (repr.reserved));
1324 brig_code.add (&repr, sizeof (repr));
1325 brig_insn_count++;
1328 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1329 necessary operands for writing. */
1331 static void
1332 emit_signal_insn (hsa_insn_signal *mem)
1334 struct BrigInstSignal repr;
1336 /* This is necessary because of the erroneous typedef of
1337 BrigMemoryModifier8_t which introduces padding which may then contain
1338 random stuff (which we do not want so that we can test things don't
1339 change). */
1340 memset (&repr, 0, sizeof (repr));
1341 repr.base.base.byteCount = lendian16 (sizeof (repr));
1342 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1343 repr.base.opcode = lendian16 (mem->m_opcode);
1344 repr.base.type = lendian16 (mem->m_type);
1345 repr.base.operands = lendian32 (emit_insn_operands (mem));
1347 repr.memoryOrder = mem->m_memoryorder;
1348 repr.signalOperation = mem->m_atomicop;
1349 repr.signalType = BRIG_TYPE_SIG64;
1351 brig_code.add (&repr, sizeof (repr));
1352 brig_insn_count++;
1355 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1356 necessary operands for writing. */
1358 static void
1359 emit_atomic_insn (hsa_insn_atomic *mem)
1361 struct BrigInstAtomic repr;
1363 /* Either operand[0] or operand[1] must be an address operand. */
1364 hsa_op_address *addr = NULL;
1365 if (is_a <hsa_op_address *> (mem->get_op (0)))
1366 addr = as_a <hsa_op_address *> (mem->get_op (0));
1367 else
1368 addr = as_a <hsa_op_address *> (mem->get_op (1));
1370 /* This is necessary because of the erroneous typedef of
1371 BrigMemoryModifier8_t which introduces padding which may then contain
1372 random stuff (which we do not want so that we can test things don't
1373 change). */
1374 memset (&repr, 0, sizeof (repr));
1375 repr.base.base.byteCount = lendian16 (sizeof (repr));
1376 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1377 repr.base.opcode = lendian16 (mem->m_opcode);
1378 repr.base.type = lendian16 (mem->m_type);
1379 repr.base.operands = lendian32 (emit_insn_operands (mem));
1381 if (addr->m_symbol)
1382 repr.segment = addr->m_symbol->m_segment;
1383 else
1384 repr.segment = BRIG_SEGMENT_FLAT;
1385 repr.memoryOrder = mem->m_memoryorder;
1386 repr.memoryScope = mem->m_memoryscope;
1387 repr.atomicOperation = mem->m_atomicop;
1389 brig_code.add (&repr, sizeof (repr));
1390 brig_insn_count++;
1393 /* Emit an HSA LDA instruction and all necessary directives, schedule
1394 necessary operands for writing. */
1396 static void
1397 emit_addr_insn (hsa_insn_basic *insn)
1399 struct BrigInstAddr repr;
1401 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1403 repr.base.base.byteCount = lendian16 (sizeof (repr));
1404 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1405 repr.base.opcode = lendian16 (insn->m_opcode);
1406 repr.base.type = lendian16 (insn->m_type);
1407 repr.base.operands = lendian32 (emit_insn_operands (insn));
1409 if (addr->m_symbol)
1410 repr.segment = addr->m_symbol->m_segment;
1411 else
1412 repr.segment = BRIG_SEGMENT_FLAT;
1413 memset (&repr.reserved, 0, sizeof (repr.reserved));
1415 brig_code.add (&repr, sizeof (repr));
1416 brig_insn_count++;
1419 /* Emit an HSA segment conversion instruction and all necessary directives,
1420 schedule necessary operands for writing. */
1422 static void
1423 emit_segment_insn (hsa_insn_seg *seg)
1425 struct BrigInstSegCvt repr;
1427 repr.base.base.byteCount = lendian16 (sizeof (repr));
1428 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1429 repr.base.opcode = lendian16 (seg->m_opcode);
1430 repr.base.type = lendian16 (seg->m_type);
1431 repr.base.operands = lendian32 (emit_insn_operands (seg));
1432 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1433 repr.segment = seg->m_segment;
1434 repr.modifier = 0;
1436 brig_code.add (&repr, sizeof (repr));
1438 brig_insn_count++;
1441 /* Emit an HSA alloca instruction and all necessary directives,
1442 schedule necessary operands for writing. */
1444 static void
1445 emit_alloca_insn (hsa_insn_alloca *alloca)
1447 struct BrigInstMem repr;
1448 gcc_checking_assert (alloca->operand_count () == 2);
1450 /* This is necessary because of the erroneous typedef of
1451 BrigMemoryModifier8_t which introduces padding which may then contain
1452 random stuff (which we do not want so that we can test things don't
1453 change). */
1454 memset (&repr, 0, sizeof (repr));
1455 repr.base.base.byteCount = lendian16 (sizeof (repr));
1456 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1457 repr.base.opcode = lendian16 (alloca->m_opcode);
1458 repr.base.type = lendian16 (alloca->m_type);
1459 repr.base.operands = lendian32 (emit_insn_operands (alloca));
1460 repr.segment = BRIG_SEGMENT_PRIVATE;
1461 repr.modifier = 0;
1462 repr.equivClass = 0;
1463 repr.align = alloca->m_align;
1464 repr.width = BRIG_WIDTH_NONE;
1465 memset (&repr.reserved, 0, sizeof (repr.reserved));
1466 brig_code.add (&repr, sizeof (repr));
1467 brig_insn_count++;
1470 /* Emit an HSA comparison instruction and all necessary directives,
1471 schedule necessary operands for writing. */
1473 static void
1474 emit_cmp_insn (hsa_insn_cmp *cmp)
1476 struct BrigInstCmp repr;
1478 memset (&repr, 0, sizeof (repr));
1479 repr.base.base.byteCount = lendian16 (sizeof (repr));
1480 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1481 repr.base.opcode = lendian16 (cmp->m_opcode);
1482 repr.base.type = lendian16 (cmp->m_type);
1483 repr.base.operands = lendian32 (emit_insn_operands (cmp));
1485 if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1486 repr.sourceType
1487 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1488 else
1489 repr.sourceType
1490 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1491 repr.modifier = 0;
1492 repr.compare = cmp->m_compare;
1493 repr.pack = 0;
1495 brig_code.add (&repr, sizeof (repr));
1496 brig_insn_count++;
1499 /* Emit an HSA branching instruction and all necessary directives, schedule
1500 necessary operands for writing. */
1502 static void
1503 emit_branch_insn (hsa_insn_br *br)
1505 struct BrigInstBr repr;
1507 basic_block target = NULL;
1508 edge_iterator ei;
1509 edge e;
1511 /* At the moment we only handle direct conditional jumps. */
1512 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1513 repr.base.base.byteCount = lendian16 (sizeof (repr));
1514 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1515 repr.base.opcode = lendian16 (br->m_opcode);
1516 repr.width = BRIG_WIDTH_1;
1517 /* For Conditional jumps the type is always B1. */
1518 repr.base.type = lendian16 (BRIG_TYPE_B1);
1520 FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1521 if (e->flags & EDGE_TRUE_VALUE)
1523 target = e->dest;
1524 break;
1526 gcc_assert (target);
1528 repr.base.operands
1529 = lendian32 (emit_operands (br->get_op (0),
1530 &hsa_bb_for_bb (target)->m_label_ref));
1531 memset (&repr.reserved, 0, sizeof (repr.reserved));
1533 brig_code.add (&repr, sizeof (repr));
1534 brig_insn_count++;
1537 /* Emit an HSA unconditional jump branching instruction that points to
1538 a label REFERENCE. */
1540 static void
1541 emit_unconditional_jump (hsa_op_code_ref *reference)
1543 struct BrigInstBr repr;
1545 repr.base.base.byteCount = lendian16 (sizeof (repr));
1546 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1547 repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1548 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1549 /* Direct branches to labels must be width(all). */
1550 repr.width = BRIG_WIDTH_ALL;
1552 repr.base.operands = lendian32 (emit_operands (reference));
1553 memset (&repr.reserved, 0, sizeof (repr.reserved));
1554 brig_code.add (&repr, sizeof (repr));
1555 brig_insn_count++;
1558 /* Emit an HSA switch jump instruction that uses a jump table to
1559 jump to a destination label. */
1561 static void
1562 emit_switch_insn (hsa_insn_sbr *sbr)
1564 struct BrigInstBr repr;
1566 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1567 repr.base.base.byteCount = lendian16 (sizeof (repr));
1568 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1569 repr.base.opcode = lendian16 (sbr->m_opcode);
1570 repr.width = BRIG_WIDTH_1;
1571 /* For Conditional jumps the type is always B1. */
1572 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1573 repr.base.type = lendian16 (index->m_type);
1574 repr.base.operands
1575 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1576 memset (&repr.reserved, 0, sizeof (repr.reserved));
1578 brig_code.add (&repr, sizeof (repr));
1579 brig_insn_count++;
1581 /* Emit jump to default label. */
1582 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_default_bb);
1583 emit_unconditional_jump (&hbb->m_label_ref);
1586 /* Emit a HSA convert instruction and all necessary directives, schedule
1587 necessary operands for writing. */
1589 static void
1590 emit_cvt_insn (hsa_insn_cvt *insn)
1592 struct BrigInstCvt repr;
1593 BrigType16_t srctype;
1595 repr.base.base.byteCount = lendian16 (sizeof (repr));
1596 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1597 repr.base.opcode = lendian16 (insn->m_opcode);
1598 repr.base.type = lendian16 (insn->m_type);
1599 repr.base.operands = lendian32 (emit_insn_operands (insn));
1601 if (is_a <hsa_op_reg *> (insn->get_op (1)))
1602 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1603 else
1604 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1605 repr.sourceType = lendian16 (srctype);
1606 repr.modifier = 0;
1607 /* float to smaller float requires a rounding setting (we default
1608 to 'near'. */
1609 if (hsa_type_float_p (insn->m_type)
1610 && (!hsa_type_float_p (srctype)
1611 || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1612 < (srctype & BRIG_TYPE_BASE_MASK))))
1613 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1614 else if (hsa_type_integer_p (insn->m_type) &&
1615 hsa_type_float_p (srctype))
1616 repr.round = BRIG_ROUND_INTEGER_ZERO;
1617 else
1618 repr.round = BRIG_ROUND_NONE;
1619 brig_code.add (&repr, sizeof (repr));
1620 brig_insn_count++;
1623 /* Emit call instruction INSN, where this instruction must be closed
1624 within a call block instruction. */
1626 static void
1627 emit_call_insn (hsa_insn_call *call)
1629 struct BrigInstBr repr;
1631 repr.base.base.byteCount = lendian16 (sizeof (repr));
1632 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1633 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1634 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1636 repr.base.operands
1637 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1638 call->m_args_code_list));
1640 /* Internal functions have not set m_called_function. */
1641 if (call->m_called_function)
1643 function_linkage_pair pair (call->m_called_function,
1644 call->m_func.m_brig_op_offset);
1645 function_call_linkage.safe_push (pair);
1647 else
1649 hsa_internal_fn *slot
1650 = hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1651 gcc_assert (slot);
1652 gcc_assert (slot->m_offset > 0);
1653 call->m_func.m_directive_offset = slot->m_offset;
1656 repr.width = BRIG_WIDTH_ALL;
1657 memset (&repr.reserved, 0, sizeof (repr.reserved));
1659 brig_code.add (&repr, sizeof (repr));
1660 brig_insn_count++;
1663 /* Emit argument block directive. */
1665 static void
1666 emit_arg_block_insn (hsa_insn_arg_block *insn)
1668 switch (insn->m_kind)
1670 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1672 struct BrigDirectiveArgBlock repr;
1673 repr.base.byteCount = lendian16 (sizeof (repr));
1674 repr.base.kind = lendian16 (insn->m_kind);
1675 brig_code.add (&repr, sizeof (repr));
1677 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1679 insn->m_call_insn->m_args_code_list->m_offsets[i]
1680 = lendian32 (emit_directive_variable
1681 (insn->m_call_insn->m_input_args[i]));
1682 brig_insn_count++;
1685 if (insn->m_call_insn->m_output_arg)
1687 insn->m_call_insn->m_result_code_list->m_offsets[0]
1688 = lendian32 (emit_directive_variable
1689 (insn->m_call_insn->m_output_arg));
1690 brig_insn_count++;
1693 break;
1695 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1697 struct BrigDirectiveArgBlock repr;
1698 repr.base.byteCount = lendian16 (sizeof (repr));
1699 repr.base.kind = lendian16 (insn->m_kind);
1700 brig_code.add (&repr, sizeof (repr));
1701 break;
1703 default:
1704 gcc_unreachable ();
1707 brig_insn_count++;
1710 /* Emit comment directive. */
1712 static void
1713 emit_comment_insn (hsa_insn_comment *insn)
1715 struct BrigDirectiveComment repr;
1716 memset (&repr, 0, sizeof (repr));
1718 repr.base.byteCount = lendian16 (sizeof (repr));
1719 repr.base.kind = lendian16 (insn->m_opcode);
1720 repr.name = brig_emit_string (insn->m_comment, '\0', false);
1721 brig_code.add (&repr, sizeof (repr));
1724 /* Emit queue instruction INSN. */
1726 static void
1727 emit_queue_insn (hsa_insn_queue *insn)
1729 BrigInstQueue repr;
1730 memset (&repr, 0, sizeof (repr));
1732 repr.base.base.byteCount = lendian16 (sizeof (repr));
1733 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1734 repr.base.opcode = lendian16 (insn->m_opcode);
1735 repr.base.type = lendian16 (insn->m_type);
1736 repr.segment = BRIG_SEGMENT_GLOBAL;
1737 repr.memoryOrder = BRIG_MEMORY_ORDER_SC_RELEASE;
1738 repr.base.operands = lendian32 (emit_insn_operands (insn));
1739 brig_data.round_size_up (4);
1740 brig_code.add (&repr, sizeof (repr));
1742 brig_insn_count++;
1745 /* Emit source type instruction INSN. */
1747 static void
1748 emit_srctype_insn (hsa_insn_srctype *insn)
1750 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1751 struct BrigInstSourceType repr;
1752 unsigned operand_count = insn->operand_count ();
1753 gcc_checking_assert (operand_count >= 2);
1755 memset (&repr, 0, sizeof (repr));
1756 repr.sourceType = lendian16 (insn->m_source_type);
1757 repr.base.base.byteCount = lendian16 (sizeof (repr));
1758 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1759 repr.base.opcode = lendian16 (insn->m_opcode);
1760 repr.base.type = lendian16 (insn->m_type);
1762 repr.base.operands = lendian32 (emit_insn_operands (insn));
1763 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1764 brig_insn_count++;
1767 /* Emit packed instruction INSN. */
1769 static void
1770 emit_packed_insn (hsa_insn_packed *insn)
1772 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1773 struct BrigInstSourceType repr;
1774 unsigned operand_count = insn->operand_count ();
1775 gcc_checking_assert (operand_count >= 2);
1777 memset (&repr, 0, sizeof (repr));
1778 repr.sourceType = lendian16 (insn->m_source_type);
1779 repr.base.base.byteCount = lendian16 (sizeof (repr));
1780 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1781 repr.base.opcode = lendian16 (insn->m_opcode);
1782 repr.base.type = lendian16 (insn->m_type);
1784 if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1786 /* Create operand list for packed type. */
1787 for (unsigned i = 1; i < operand_count; i++)
1789 gcc_checking_assert (insn->get_op (i));
1790 insn->m_operand_list->m_offsets[i - 1]
1791 = lendian32 (enqueue_op (insn->get_op (i)));
1794 repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1795 insn->m_operand_list));
1797 else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1799 /* Create operand list for packed type. */
1800 for (unsigned i = 0; i < operand_count - 1; i++)
1802 gcc_checking_assert (insn->get_op (i));
1803 insn->m_operand_list->m_offsets[i]
1804 = lendian32 (enqueue_op (insn->get_op (i)));
1807 unsigned ops = emit_operands (insn->m_operand_list,
1808 insn->get_op (insn->operand_count () - 1));
1809 repr.base.operands = lendian32 (ops);
1813 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1814 brig_insn_count++;
1817 /* Emit a basic HSA instruction and all necessary directives, schedule
1818 necessary operands for writing. */
1820 static void
1821 emit_basic_insn (hsa_insn_basic *insn)
1823 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1824 struct BrigInstMod repr;
1825 BrigType16_t type;
1827 memset (&repr, 0, sizeof (repr));
1828 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1829 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1830 repr.base.opcode = lendian16 (insn->m_opcode);
1831 switch (insn->m_opcode)
1833 /* And the bit-logical operations need bit types and whine about
1834 arithmetic types :-/ */
1835 case BRIG_OPCODE_AND:
1836 case BRIG_OPCODE_OR:
1837 case BRIG_OPCODE_XOR:
1838 case BRIG_OPCODE_NOT:
1839 type = regtype_for_type (insn->m_type);
1840 break;
1841 default:
1842 type = insn->m_type;
1843 break;
1845 repr.base.type = lendian16 (type);
1846 repr.base.operands = lendian32 (emit_insn_operands (insn));
1848 if (hsa_type_packed_p (type))
1850 if (hsa_type_float_p (type)
1851 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1852 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1853 else
1854 repr.round = 0;
1855 /* We assume that destination and sources agree in packing layout. */
1856 if (insn->num_used_ops () >= 2)
1857 repr.pack = BRIG_PACK_PP;
1858 else
1859 repr.pack = BRIG_PACK_P;
1860 repr.reserved = 0;
1861 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1862 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1863 brig_code.add (&repr, sizeof (struct BrigInstMod));
1865 else
1866 brig_code.add (&repr, sizeof (struct BrigInstBasic));
1867 brig_insn_count++;
1870 /* Emit an HSA instruction and all necessary directives, schedule necessary
1871 operands for writing. */
1873 static void
1874 emit_insn (hsa_insn_basic *insn)
1876 gcc_assert (!is_a <hsa_insn_phi *> (insn));
1878 insn->m_brig_offset = brig_code.total_size;
1880 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1881 emit_signal_insn (signal);
1882 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1883 emit_atomic_insn (atom);
1884 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1885 emit_memory_insn (mem);
1886 else if (insn->m_opcode == BRIG_OPCODE_LDA)
1887 emit_addr_insn (insn);
1888 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1889 emit_segment_insn (seg);
1890 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1891 emit_cmp_insn (cmp);
1892 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1893 emit_branch_insn (br);
1894 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1896 if (switch_instructions == NULL)
1897 switch_instructions = new vec <hsa_insn_sbr *> ();
1899 switch_instructions->safe_push (sbr);
1900 emit_switch_insn (sbr);
1902 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1903 emit_arg_block_insn (block);
1904 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1905 emit_call_insn (call);
1906 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1907 emit_comment_insn (comment);
1908 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1909 emit_queue_insn (queue);
1910 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1911 emit_srctype_insn (srctype);
1912 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1913 emit_packed_insn (packed);
1914 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1915 emit_cvt_insn (cvt);
1916 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1917 emit_alloca_insn (alloca);
1918 else
1919 emit_basic_insn (insn);
1922 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1923 or we are about to finish emitting code, if it is NULL. If the fall through
1924 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1926 static void
1927 perhaps_emit_branch (basic_block bb, basic_block next_bb)
1929 basic_block t_bb = NULL, ff = NULL;
1931 edge_iterator ei;
1932 edge e;
1934 /* If the last instruction of BB is a switch, ignore emission of all
1935 edges. */
1936 if (hsa_bb_for_bb (bb)->m_last_insn
1937 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1938 return;
1940 FOR_EACH_EDGE (e, ei, bb->succs)
1941 if (e->flags & EDGE_TRUE_VALUE)
1943 gcc_assert (!t_bb);
1944 t_bb = e->dest;
1946 else
1948 gcc_assert (!ff);
1949 ff = e->dest;
1952 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1953 return;
1955 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1958 /* Emit the a function with name NAME to the various brig sections. */
1960 void
1961 hsa_brig_emit_function (void)
1963 basic_block bb, prev_bb;
1964 hsa_insn_basic *insn;
1965 BrigDirectiveExecutable *ptr_to_fndir;
1967 brig_init ();
1969 brig_insn_count = 0;
1970 memset (&op_queue, 0, sizeof (op_queue));
1971 op_queue.projected_size = brig_operand.total_size;
1973 if (!function_offsets)
1974 function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1976 if (!emitted_declarations)
1977 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1979 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1981 tree called = hsa_cfun->m_called_functions[i];
1983 /* If the function has no definition, emit a declaration. */
1984 if (!emitted_declarations->get (called))
1986 BrigDirectiveExecutable *e = emit_function_declaration (called);
1987 emitted_declarations->put (called, e);
1991 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
1993 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
1994 emit_internal_fn_decl (called);
1997 ptr_to_fndir = emit_function_directives (hsa_cfun, false);
1998 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
1999 insn;
2000 insn = insn->m_next)
2001 emit_insn (insn);
2002 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2003 FOR_EACH_BB_FN (bb, cfun)
2005 perhaps_emit_branch (prev_bb, bb);
2006 emit_bb_label_directive (hsa_bb_for_bb (bb));
2007 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2008 emit_insn (insn);
2009 prev_bb = bb;
2011 perhaps_emit_branch (prev_bb, NULL);
2012 ptr_to_fndir->nextModuleEntry = brig_code.total_size;
2014 /* Fill up label references for all sbr instructions. */
2015 if (switch_instructions)
2017 for (unsigned i = 0; i < switch_instructions->length (); i++)
2019 hsa_insn_sbr *sbr = (*switch_instructions)[i];
2020 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2022 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2023 sbr->m_label_code_list->m_offsets[j]
2024 = hbb->m_label_ref.m_directive_offset;
2028 switch_instructions->release ();
2029 delete switch_instructions;
2030 switch_instructions = NULL;
2033 if (dump_file)
2035 fprintf (dump_file, "------- After BRIG emission: -------\n");
2036 dump_hsa_cfun (dump_file);
2039 emit_queued_operands ();
2042 /* Emit all OMP symbols related to OMP. */
2044 void
2045 hsa_brig_emit_omp_symbols (void)
2047 brig_init ();
2048 emit_directive_variable (hsa_num_threads);
2051 /* Create and return __hsa_global_variables symbol that contains
2052 all informations consumed by libgomp to link global variables
2053 with their string names used by an HSA kernel. */
2055 static tree
2056 hsa_output_global_variables ()
2058 unsigned l = hsa_global_variable_symbols->elements ();
2060 tree variable_info_type = make_node (RECORD_TYPE);
2061 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2062 get_identifier ("name"), ptr_type_node);
2063 DECL_CHAIN (id_f1) = NULL_TREE;
2064 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2065 get_identifier ("omp_data_size"),
2066 ptr_type_node);
2067 DECL_CHAIN (id_f2) = id_f1;
2068 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2069 NULL_TREE);
2071 tree int_num_of_global_vars;
2072 int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2073 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2074 tree global_vars_array_type = build_array_type (variable_info_type,
2075 global_vars_num_index_type);
2076 TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2078 vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2080 for (hash_table <hsa_noop_symbol_hasher>::iterator it
2081 = hsa_global_variable_symbols->begin ();
2082 it != hsa_global_variable_symbols->end (); ++it)
2084 unsigned len = strlen ((*it)->m_name);
2085 char *copy = XNEWVEC (char, len + 2);
2086 copy[0] = '&';
2087 memcpy (copy + 1, (*it)->m_name, len);
2088 copy[len + 1] = '\0';
2089 len++;
2090 hsa_sanitize_name (copy);
2092 tree var_name = build_string (len, copy);
2093 TREE_TYPE (var_name)
2094 = build_array_type (char_type_node, build_index_type (size_int (len)));
2095 free (copy);
2097 vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2098 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2099 build1 (ADDR_EXPR,
2100 build_pointer_type (TREE_TYPE (var_name)),
2101 var_name));
2102 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2103 build_fold_addr_expr ((*it)->m_decl));
2105 tree variable_info_ctor = build_constructor (variable_info_type,
2106 variable_info_vec);
2108 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2109 variable_info_ctor);
2112 tree global_vars_ctor = build_constructor (global_vars_array_type,
2113 global_vars_vec);
2115 char tmp_name[64];
2116 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2117 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2118 get_identifier (tmp_name),
2119 global_vars_array_type);
2120 TREE_STATIC (global_vars_table) = 1;
2121 TREE_READONLY (global_vars_table) = 1;
2122 TREE_PUBLIC (global_vars_table) = 0;
2123 DECL_ARTIFICIAL (global_vars_table) = 1;
2124 DECL_IGNORED_P (global_vars_table) = 1;
2125 DECL_EXTERNAL (global_vars_table) = 0;
2126 TREE_CONSTANT (global_vars_table) = 1;
2127 DECL_INITIAL (global_vars_table) = global_vars_ctor;
2128 varpool_node::finalize_decl (global_vars_table);
2130 return global_vars_table;
2133 /* Create __hsa_host_functions and __hsa_kernels that contain
2134 all informations consumed by libgomp to register all kernels
2135 in the BRIG binary. */
2137 static void
2138 hsa_output_kernels (tree *host_func_table, tree *kernels)
2140 unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2142 tree int_num_of_kernels;
2143 int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2144 tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2145 tree host_functions_array_type = build_array_type (ptr_type_node,
2146 kernel_num_index_type);
2147 TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2149 vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2150 for (unsigned i = 0; i < map_count; ++i)
2152 tree decl = hsa_get_decl_kernel_mapping_decl (i);
2153 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2154 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2156 tree host_functions_ctor = build_constructor (host_functions_array_type,
2157 host_functions_vec);
2158 char tmp_name[64];
2159 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2160 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2161 get_identifier (tmp_name),
2162 host_functions_array_type);
2163 TREE_STATIC (hsa_host_func_table) = 1;
2164 TREE_READONLY (hsa_host_func_table) = 1;
2165 TREE_PUBLIC (hsa_host_func_table) = 0;
2166 DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2167 DECL_IGNORED_P (hsa_host_func_table) = 1;
2168 DECL_EXTERNAL (hsa_host_func_table) = 0;
2169 TREE_CONSTANT (hsa_host_func_table) = 1;
2170 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2171 varpool_node::finalize_decl (hsa_host_func_table);
2172 *host_func_table = hsa_host_func_table;
2174 /* Following code emits list of kernel_info structures. */
2176 tree kernel_info_type = make_node (RECORD_TYPE);
2177 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2178 get_identifier ("name"), ptr_type_node);
2179 DECL_CHAIN (id_f1) = NULL_TREE;
2180 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2181 get_identifier ("omp_data_size"),
2182 unsigned_type_node);
2183 DECL_CHAIN (id_f2) = id_f1;
2184 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2185 get_identifier ("gridified_kernel_p"),
2186 boolean_type_node);
2187 DECL_CHAIN (id_f3) = id_f2;
2188 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2189 get_identifier ("kernel_dependencies_count"),
2190 unsigned_type_node);
2191 DECL_CHAIN (id_f4) = id_f3;
2192 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2193 get_identifier ("kernel_dependencies"),
2194 build_pointer_type (build_pointer_type
2195 (char_type_node)));
2196 DECL_CHAIN (id_f5) = id_f4;
2197 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2198 NULL_TREE);
2200 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2201 tree kernel_info_vector_type
2202 = build_array_type (kernel_info_type,
2203 build_index_type (int_num_of_kernels));
2204 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2206 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2207 tree kernel_dependencies_vector_type = NULL;
2209 for (unsigned i = 0; i < map_count; ++i)
2211 tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2212 char *name = hsa_get_decl_kernel_mapping_name (i);
2213 unsigned len = strlen (name);
2214 char *copy = XNEWVEC (char, len + 2);
2215 copy[0] = '&';
2216 memcpy (copy + 1, name, len);
2217 copy[len + 1] = '\0';
2218 len++;
2220 tree kern_name = build_string (len, copy);
2221 TREE_TYPE (kern_name)
2222 = build_array_type (char_type_node, build_index_type (size_int (len)));
2223 free (copy);
2225 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2226 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2227 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2228 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2229 gridified_kernel_p);
2230 unsigned count = 0;
2232 kernel_dependencies_vector_type
2233 = build_array_type (build_pointer_type (char_type_node),
2234 build_index_type (size_int (0)));
2236 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2237 if (hsa_decl_kernel_dependencies)
2239 vec<const char *> **slot;
2240 slot = hsa_decl_kernel_dependencies->get (kernel);
2241 if (slot)
2243 vec <const char *> *dependencies = *slot;
2244 count = dependencies->length ();
2246 kernel_dependencies_vector_type
2247 = build_array_type (build_pointer_type (char_type_node),
2248 build_index_type (size_int (count)));
2249 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2251 for (unsigned j = 0; j < count; j++)
2253 const char *d = (*dependencies)[j];
2254 len = strlen (d);
2255 tree dependency_name = build_string (len, d);
2256 TREE_TYPE (dependency_name)
2257 = build_array_type (char_type_node,
2258 build_index_type (size_int (len)));
2260 CONSTRUCTOR_APPEND_ELT
2261 (kernel_dependencies_vec, NULL_TREE,
2262 build1 (ADDR_EXPR,
2263 build_pointer_type (TREE_TYPE (dependency_name)),
2264 dependency_name));
2269 tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2271 vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2272 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2273 build1 (ADDR_EXPR,
2274 build_pointer_type (TREE_TYPE
2275 (kern_name)),
2276 kern_name));
2277 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2278 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2279 gridified_kernel_p_tree);
2280 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2282 if (count > 0)
2284 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2285 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2286 get_identifier (tmp_name),
2287 kernel_dependencies_vector_type);
2289 TREE_STATIC (dependencies_list) = 1;
2290 TREE_READONLY (dependencies_list) = 1;
2291 TREE_PUBLIC (dependencies_list) = 0;
2292 DECL_ARTIFICIAL (dependencies_list) = 1;
2293 DECL_IGNORED_P (dependencies_list) = 1;
2294 DECL_EXTERNAL (dependencies_list) = 0;
2295 TREE_CONSTANT (dependencies_list) = 1;
2296 DECL_INITIAL (dependencies_list)
2297 = build_constructor (kernel_dependencies_vector_type,
2298 kernel_dependencies_vec);
2299 varpool_node::finalize_decl (dependencies_list);
2301 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2302 build1 (ADDR_EXPR,
2303 build_pointer_type
2304 (TREE_TYPE (dependencies_list)),
2305 dependencies_list));
2307 else
2308 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2310 tree kernel_info_ctor = build_constructor (kernel_info_type,
2311 kernel_info_vec);
2313 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2314 kernel_info_ctor);
2317 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2318 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2319 get_identifier (tmp_name),
2320 kernel_info_vector_type);
2322 TREE_STATIC (hsa_kernels) = 1;
2323 TREE_READONLY (hsa_kernels) = 1;
2324 TREE_PUBLIC (hsa_kernels) = 0;
2325 DECL_ARTIFICIAL (hsa_kernels) = 1;
2326 DECL_IGNORED_P (hsa_kernels) = 1;
2327 DECL_EXTERNAL (hsa_kernels) = 0;
2328 TREE_CONSTANT (hsa_kernels) = 1;
2329 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2330 kernel_info_vector_vec);
2331 varpool_node::finalize_decl (hsa_kernels);
2332 *kernels = hsa_kernels;
2335 /* Create a static constructor that will register out brig stuff with
2336 libgomp. */
2338 static void
2339 hsa_output_libgomp_mapping (tree brig_decl)
2341 unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2342 unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2344 tree kernels;
2345 tree host_func_table;
2347 hsa_output_kernels (&host_func_table, &kernels);
2348 tree global_vars = hsa_output_global_variables ();
2350 tree hsa_image_desc_type = make_node (RECORD_TYPE);
2351 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2352 get_identifier ("brig_module"), ptr_type_node);
2353 DECL_CHAIN (id_f1) = NULL_TREE;
2354 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2355 get_identifier ("kernel_count"),
2356 unsigned_type_node);
2358 DECL_CHAIN (id_f2) = id_f1;
2359 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2360 get_identifier ("hsa_kernel_infos"),
2361 ptr_type_node);
2362 DECL_CHAIN (id_f3) = id_f2;
2363 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2364 get_identifier ("global_variable_count"),
2365 unsigned_type_node);
2366 DECL_CHAIN (id_f4) = id_f3;
2367 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2368 get_identifier ("hsa_global_variable_infos"),
2369 ptr_type_node);
2370 DECL_CHAIN (id_f5) = id_f4;
2371 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2372 NULL_TREE);
2373 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2375 vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2376 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2377 build_fold_addr_expr (brig_decl));
2378 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2379 build_int_cstu (unsigned_type_node, kernel_count));
2380 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2381 build1 (ADDR_EXPR,
2382 build_pointer_type (TREE_TYPE (kernels)),
2383 kernels));
2384 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2385 build_int_cstu (unsigned_type_node,
2386 global_variable_count));
2387 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2388 build1 (ADDR_EXPR,
2389 build_pointer_type (TREE_TYPE (global_vars)),
2390 global_vars));
2392 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2394 char tmp_name[64];
2395 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2396 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2397 get_identifier (tmp_name),
2398 hsa_image_desc_type);
2399 TREE_STATIC (hsa_img_descriptor) = 1;
2400 TREE_READONLY (hsa_img_descriptor) = 1;
2401 TREE_PUBLIC (hsa_img_descriptor) = 0;
2402 DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2403 DECL_IGNORED_P (hsa_img_descriptor) = 1;
2404 DECL_EXTERNAL (hsa_img_descriptor) = 0;
2405 TREE_CONSTANT (hsa_img_descriptor) = 1;
2406 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2407 varpool_node::finalize_decl (hsa_img_descriptor);
2409 /* Construct the "host_table" libgomp expects. */
2410 tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2411 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2412 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2413 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2414 tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2415 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2416 host_func_table_addr);
2417 offset_int func_table_size
2418 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2419 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2420 fold_build2 (POINTER_PLUS_EXPR,
2421 TREE_TYPE (host_func_table_addr),
2422 host_func_table_addr,
2423 build_int_cst (size_type_node,
2424 func_table_size.to_uhwi
2425 ())));
2426 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2427 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2428 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2429 libgomp_host_table_vec);
2430 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2431 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2432 get_identifier (tmp_name),
2433 libgomp_host_table_type);
2435 TREE_STATIC (hsa_libgomp_host_table) = 1;
2436 TREE_READONLY (hsa_libgomp_host_table) = 1;
2437 TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2438 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2439 DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2440 DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2441 TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2442 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2443 varpool_node::finalize_decl (hsa_libgomp_host_table);
2445 /* Generate an initializer with a call to the registration routine. */
2447 tree offload_register
2448 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2449 gcc_checking_assert (offload_register);
2451 tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2452 append_to_statement_list
2453 (build_call_expr (offload_register, 4,
2454 build_int_cstu (unsigned_type_node,
2455 GOMP_VERSION_PACK (GOMP_VERSION,
2456 GOMP_VERSION_HSA)),
2457 build_fold_addr_expr (hsa_libgomp_host_table),
2458 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2459 build_fold_addr_expr (hsa_img_descriptor)),
2460 hsa_ctor_stmts);
2462 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2464 tree offload_unregister
2465 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2466 gcc_checking_assert (offload_unregister);
2468 tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2469 append_to_statement_list
2470 (build_call_expr (offload_unregister, 4,
2471 build_int_cstu (unsigned_type_node,
2472 GOMP_VERSION_PACK (GOMP_VERSION,
2473 GOMP_VERSION_HSA)),
2474 build_fold_addr_expr (hsa_libgomp_host_table),
2475 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2476 build_fold_addr_expr (hsa_img_descriptor)),
2477 hsa_dtor_stmts);
2478 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2481 /* Emit the brig module we have compiled to a section in the final assembly and
2482 also create a compile unit static constructor that will register the brig
2483 module with libgomp. */
2485 void
2486 hsa_output_brig (void)
2488 section *saved_section;
2490 if (!brig_initialized)
2491 return;
2493 for (unsigned i = 0; i < function_call_linkage.length (); i++)
2495 function_linkage_pair p = function_call_linkage[i];
2497 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2498 gcc_assert (*func_offset);
2499 BrigOperandCodeRef *code_ref
2500 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2501 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2502 code_ref->ref = lendian32 (*func_offset);
2505 /* Iterate all function declarations and if we meet a function that should
2506 have module linkage and we are unable to emit HSAIL for the function,
2507 then change the linkage to program linkage. Doing so, we will emit
2508 a valid BRIG image. */
2509 if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2510 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2511 = emitted_declarations->begin ();
2512 it != emitted_declarations->end ();
2513 ++it)
2515 if (hsa_failed_functions->contains ((*it).first))
2516 (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2519 saved_section = in_section;
2521 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2522 char tmp_name[64];
2523 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2524 ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2525 tree brig_id = get_identifier (tmp_name);
2526 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2527 char_type_node);
2528 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2529 TREE_ADDRESSABLE (brig_decl) = 1;
2530 TREE_READONLY (brig_decl) = 1;
2531 DECL_ARTIFICIAL (brig_decl) = 1;
2532 DECL_IGNORED_P (brig_decl) = 1;
2533 TREE_STATIC (brig_decl) = 1;
2534 TREE_PUBLIC (brig_decl) = 0;
2535 TREE_USED (brig_decl) = 1;
2536 DECL_INITIAL (brig_decl) = brig_decl;
2537 TREE_ASM_WRITTEN (brig_decl) = 1;
2539 BrigModuleHeader module_header;
2540 memcpy (&module_header.identification, "HSA BRIG",
2541 sizeof (module_header.identification));
2542 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2543 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2544 uint64_t section_index[3];
2546 int data_padding, code_padding, operand_padding;
2547 data_padding = HSA_SECTION_ALIGNMENT
2548 - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2549 code_padding = HSA_SECTION_ALIGNMENT
2550 - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2551 operand_padding = HSA_SECTION_ALIGNMENT
2552 - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2554 uint64_t module_size = sizeof (module_header)
2555 + sizeof (section_index)
2556 + brig_data.total_size
2557 + data_padding
2558 + brig_code.total_size
2559 + code_padding
2560 + brig_operand.total_size
2561 + operand_padding;
2562 gcc_assert ((module_size % 16) == 0);
2563 module_header.byteCount = lendian64 (module_size);
2564 memset (&module_header.hash, 0, sizeof (module_header.hash));
2565 module_header.reserved = 0;
2566 module_header.sectionCount = lendian32 (3);
2567 module_header.sectionIndex = lendian64 (sizeof (module_header));
2568 assemble_string ((const char *) &module_header, sizeof (module_header));
2569 uint64_t off = sizeof (module_header) + sizeof (section_index);
2570 section_index[0] = lendian64 (off);
2571 off += brig_data.total_size + data_padding;
2572 section_index[1] = lendian64 (off);
2573 off += brig_code.total_size + code_padding;
2574 section_index[2] = lendian64 (off);
2575 assemble_string ((const char *) &section_index, sizeof (section_index));
2577 char padding[HSA_SECTION_ALIGNMENT];
2578 memset (padding, 0, sizeof (padding));
2580 brig_data.output ();
2581 assemble_string (padding, data_padding);
2582 brig_code.output ();
2583 assemble_string (padding, code_padding);
2584 brig_operand.output ();
2585 assemble_string (padding, operand_padding);
2587 if (saved_section)
2588 switch_to_section (saved_section);
2590 hsa_output_libgomp_mapping (brig_decl);
2592 hsa_free_decl_kernel_mapping ();
2593 brig_release_data ();
2594 hsa_deinit_compilation_unit_data ();
2596 delete emitted_declarations;
2597 emitted_declarations = NULL;
2598 delete function_offsets;
2599 function_offsets = NULL;