[AArch64] Use new target pass registration framework for FMA steering pass
[official-gcc.git] / gcc / hsa-brig.c
blob66ff8f97e07fa4ae11622ce4b03a6c9a94a4f0d7
1 /* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "target.h"
27 #include "memmodel.h"
28 #include "tm_p.h"
29 #include "is-a.h"
30 #include "vec.h"
31 #include "hash-table.h"
32 #include "hash-map.h"
33 #include "tree.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
36 #include "output.h"
37 #include "cfg.h"
38 #include "function.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "gimple-pretty-print.h"
42 #include "diagnostic-core.h"
43 #include "cgraph.h"
44 #include "dumpfile.h"
45 #include "print-tree.h"
46 #include "symbol-summary.h"
47 #include "hsa.h"
48 #include "gomp-constants.h"
50 /* Convert VAL to little endian form, if necessary. */
52 static uint16_t
53 lendian16 (uint16_t val)
55 #if GCC_VERSION >= 4008
56 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
57 return val;
58 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
59 return __builtin_bswap16 (val);
60 #else /* __ORDER_PDP_ENDIAN__ */
61 return val;
62 #endif
63 #else
64 // provide a safe slower default, with shifts and masking
65 #ifndef WORDS_BIGENDIAN
66 return val;
67 #else
68 return (val >> 8) | (val << 8);
69 #endif
70 #endif
73 /* Convert VAL to little endian form, if necessary. */
75 static uint32_t
76 lendian32 (uint32_t val)
78 #if GCC_VERSION >= 4006
79 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
80 return val;
81 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
82 return __builtin_bswap32 (val);
83 #else /* __ORDER_PDP_ENDIAN__ */
84 return (val >> 16) | (val << 16);
85 #endif
86 #else
87 // provide a safe slower default, with shifts and masking
88 #ifndef WORDS_BIGENDIAN
89 return val;
90 #else
91 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
92 return (val >> 16) | (val << 16);
93 #endif
94 #endif
97 /* Convert VAL to little endian form, if necessary. */
99 static uint64_t
100 lendian64 (uint64_t val)
102 #if GCC_VERSION >= 4006
103 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
104 return val;
105 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
106 return __builtin_bswap64 (val);
107 #else /* __ORDER_PDP_ENDIAN__ */
108 return (((val & 0xffffll) << 48)
109 | ((val & 0xffff0000ll) << 16)
110 | ((val & 0xffff00000000ll) >> 16)
111 | ((val & 0xffff000000000000ll) >> 48));
112 #endif
113 #else
114 // provide a safe slower default, with shifts and masking
115 #ifndef WORDS_BIGENDIAN
116 return val;
117 #else
118 val = (((val & 0xff00ff00ff00ff00ll) >> 8)
119 | ((val & 0x00ff00ff00ff00ffll) << 8));
120 val = ((( val & 0xffff0000ffff0000ll) >> 16)
121 | (( val & 0x0000ffff0000ffffll) << 16));
122 return (val >> 32) | (val << 32);
123 #endif
124 #endif
127 #define BRIG_ELF_SECTION_NAME ".brig"
128 #define BRIG_LABEL_STRING "hsa_brig"
129 #define BRIG_SECTION_DATA_NAME "hsa_data"
130 #define BRIG_SECTION_CODE_NAME "hsa_code"
131 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
133 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
135 /* Required HSA section alignment. */
137 #define HSA_SECTION_ALIGNMENT 16
139 /* Chunks of BRIG binary data. */
141 struct hsa_brig_data_chunk
143 /* Size of the data already stored into a chunk. */
144 unsigned size;
146 /* Pointer to the data. */
147 char *data;
150 /* Structure representing a BRIG section, holding and writing its data. */
152 class hsa_brig_section
154 public:
155 /* Section name that will be output to the BRIG. */
156 const char *section_name;
157 /* Size in bytes of all data stored in the section. */
158 unsigned total_size;
159 /* The size of the header of the section including padding. */
160 unsigned header_byte_count;
161 /* The size of the header of the section without any padding. */
162 unsigned header_byte_delta;
164 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
165 vec <struct hsa_brig_data_chunk> chunks;
167 /* More convenient access to the last chunk from the vector above. */
168 struct hsa_brig_data_chunk *cur_chunk;
170 void allocate_new_chunk ();
171 void init (const char *name);
172 void release ();
173 void output ();
174 unsigned add (const void *data, unsigned len);
175 void round_size_up (int factor);
176 void *get_ptr_by_offset (unsigned int offset);
179 static struct hsa_brig_section brig_data, brig_code, brig_operand;
180 static uint32_t brig_insn_count;
181 static bool brig_initialized = false;
183 /* Mapping between emitted HSA functions and their offset in code segment. */
184 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
186 /* Hash map of emitted function declarations. */
187 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
189 /* Hash table of emitted internal function declaration offsets. */
190 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
192 /* List of sbr instructions. */
193 static vec <hsa_insn_sbr *> *switch_instructions;
195 struct function_linkage_pair
197 function_linkage_pair (tree decl, unsigned int off)
198 : function_decl (decl), offset (off) {}
200 /* Declaration of called function. */
201 tree function_decl;
203 /* Offset in operand section. */
204 unsigned int offset;
207 /* Vector of function calls where we need to resolve function offsets. */
208 static auto_vec <function_linkage_pair> function_call_linkage;
210 /* Add a new chunk, allocate data for it and initialize it. */
212 void
213 hsa_brig_section::allocate_new_chunk ()
215 struct hsa_brig_data_chunk new_chunk;
217 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
218 new_chunk.size = 0;
219 cur_chunk = chunks.safe_push (new_chunk);
222 /* Initialize the brig section. */
224 void
225 hsa_brig_section::init (const char *name)
227 section_name = name;
228 /* While the following computation is basically wrong, because the intent
229 certainly wasn't to have the first character of name and padding, which
230 are a part of sizeof (BrigSectionHeader), included in the first addend,
231 this is what the disassembler expects. */
232 total_size = sizeof (BrigSectionHeader) + strlen (section_name);
233 chunks.create (1);
234 allocate_new_chunk ();
235 header_byte_delta = total_size;
236 round_size_up (4);
237 header_byte_count = total_size;
240 /* Free all data in the section. */
242 void
243 hsa_brig_section::release ()
245 for (unsigned i = 0; i < chunks.length (); i++)
246 free (chunks[i].data);
247 chunks.release ();
248 cur_chunk = NULL;
251 /* Write the section to the output file to a section with the name given at
252 initialization. Switches the output section and does not restore it. */
254 void
255 hsa_brig_section::output ()
257 struct BrigSectionHeader section_header;
258 char padding[8];
260 section_header.byteCount = lendian64 (total_size);
261 section_header.headerByteCount = lendian32 (header_byte_count);
262 section_header.nameLength = lendian32 (strlen (section_name));
263 assemble_string ((const char *) &section_header, 16);
264 assemble_string (section_name, (section_header.nameLength));
265 memset (&padding, 0, sizeof (padding));
266 /* This is also a consequence of the wrong header size computation described
267 in a comment in hsa_brig_section::init. */
268 assemble_string (padding, 8);
269 for (unsigned i = 0; i < chunks.length (); i++)
270 assemble_string (chunks[i].data, chunks[i].size);
273 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
274 which it was stored. */
276 unsigned
277 hsa_brig_section::add (const void *data, unsigned len)
279 unsigned offset = total_size;
281 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
282 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
283 allocate_new_chunk ();
285 memcpy (cur_chunk->data + cur_chunk->size, data, len);
286 cur_chunk->size += len;
287 total_size += len;
289 return offset;
292 /* Add padding to section so that its size is divisible by FACTOR. */
294 void
295 hsa_brig_section::round_size_up (int factor)
297 unsigned padding, res = total_size % factor;
299 if (res == 0)
300 return;
302 padding = factor - res;
303 total_size += padding;
304 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
306 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
307 cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
308 allocate_new_chunk ();
311 cur_chunk->size += padding;
314 /* Return pointer to data by global OFFSET in the section. */
316 void *
317 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
319 gcc_assert (offset < total_size);
320 offset -= header_byte_delta;
322 unsigned i;
323 for (i = 0; offset >= chunks[i].size; i++)
324 offset -= chunks[i].size;
326 return chunks[i].data + offset;
329 /* BRIG string data hashing. */
331 struct brig_string_slot
333 const char *s;
334 char prefix;
335 int len;
336 uint32_t offset;
339 /* Hash table helpers. */
341 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
343 static inline hashval_t hash (const value_type);
344 static inline bool equal (const value_type, const compare_type);
345 static inline void remove (value_type);
348 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
349 to support strings that may not end in '\0'. */
351 inline hashval_t
352 brig_string_slot_hasher::hash (const value_type ds)
354 hashval_t r = ds->len;
355 int i;
357 for (i = 0; i < ds->len; i++)
358 r = r * 67 + (unsigned) ds->s[i] - 113;
359 r = r * 67 + (unsigned) ds->prefix - 113;
360 return r;
363 /* Returns nonzero if DS1 and DS2 are equal. */
365 inline bool
366 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
368 if (ds1->len == ds2->len)
369 return ds1->prefix == ds2->prefix
370 && memcmp (ds1->s, ds2->s, ds1->len) == 0;
372 return 0;
375 /* Deallocate memory for DS upon its removal. */
377 inline void
378 brig_string_slot_hasher::remove (value_type ds)
380 free (const_cast<char *> (ds->s));
381 free (ds);
384 /* Hash for strings we output in order not to duplicate them needlessly. */
386 static hash_table<brig_string_slot_hasher> *brig_string_htab;
388 /* Emit a null terminated string STR to the data section and return its
389 offset in it. If PREFIX is non-zero, output it just before STR too.
390 Sanitize the string if SANITIZE option is set to true. */
392 static unsigned
393 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
395 unsigned slen = strlen (str);
396 unsigned offset, len = slen + (prefix ? 1 : 0);
397 uint32_t hdr_len = lendian32 (len);
398 brig_string_slot s_slot;
399 brig_string_slot **slot;
400 char *str2;
402 str2 = xstrdup (str);
404 if (sanitize)
405 hsa_sanitize_name (str2);
406 s_slot.s = str2;
407 s_slot.len = slen;
408 s_slot.prefix = prefix;
409 s_slot.offset = 0;
411 slot = brig_string_htab->find_slot (&s_slot, INSERT);
412 if (*slot == NULL)
414 brig_string_slot *new_slot = XCNEW (brig_string_slot);
416 /* In theory we should fill in BrigData but that would mean copying
417 the string to a buffer for no reason, so we just emulate it. */
418 offset = brig_data.add (&hdr_len, sizeof (hdr_len));
419 if (prefix)
420 brig_data.add (&prefix, 1);
422 brig_data.add (str2, slen);
423 brig_data.round_size_up (4);
425 /* TODO: could use the string we just copied into
426 brig_string->cur_chunk */
427 new_slot->s = str2;
428 new_slot->len = slen;
429 new_slot->prefix = prefix;
430 new_slot->offset = offset;
431 *slot = new_slot;
433 else
435 offset = (*slot)->offset;
436 free (str2);
439 return offset;
442 /* Linked list of queued operands. */
444 static struct operand_queue
446 /* First from the chain of queued operands. */
447 hsa_op_base *first_op, *last_op;
449 /* The offset at which the next operand will be enqueued. */
450 unsigned projected_size;
452 } op_queue;
454 /* Unless already initialized, initialize infrastructure to produce BRIG. */
456 static void
457 brig_init (void)
459 brig_insn_count = 0;
461 if (brig_initialized)
462 return;
464 brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
465 brig_data.init (BRIG_SECTION_DATA_NAME);
466 brig_code.init (BRIG_SECTION_CODE_NAME);
467 brig_operand.init (BRIG_SECTION_OPERAND_NAME);
468 brig_initialized = true;
470 struct BrigDirectiveModule moddir;
471 memset (&moddir, 0, sizeof (moddir));
472 moddir.base.byteCount = lendian16 (sizeof (moddir));
474 char *modname;
475 if (main_input_filename && *main_input_filename != '\0')
477 const char *part = strrchr (main_input_filename, '/');
478 if (!part)
479 part = main_input_filename;
480 else
481 part++;
482 modname = concat ("&__hsa_module_", part, NULL);
483 char *extension = strchr (modname, '.');
484 if (extension)
485 *extension = '\0';
487 /* As in LTO mode, we have to emit a different module names. */
488 if (flag_ltrans)
490 part = strrchr (asm_file_name, '/');
491 if (!part)
492 part = asm_file_name;
493 else
494 part++;
495 char *modname2;
496 asprintf (&modname2, "%s_%s", modname, part);
497 free (modname);
498 modname = modname2;
501 hsa_sanitize_name (modname);
502 moddir.name = brig_emit_string (modname);
503 free (modname);
505 else
506 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
507 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
508 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
509 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
510 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
511 if (hsa_machine_large_p ())
512 moddir.machineModel = BRIG_MACHINE_LARGE;
513 else
514 moddir.machineModel = BRIG_MACHINE_SMALL;
515 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
516 brig_code.add (&moddir, sizeof (moddir));
519 /* Free all BRIG data. */
521 static void
522 brig_release_data (void)
524 delete brig_string_htab;
525 brig_data.release ();
526 brig_code.release ();
527 brig_operand.release ();
529 brig_initialized = 0;
532 /* Enqueue operation OP. Return the offset at which it will be stored. */
534 static unsigned int
535 enqueue_op (hsa_op_base *op)
537 unsigned ret;
539 if (op->m_brig_op_offset)
540 return op->m_brig_op_offset;
542 ret = op_queue.projected_size;
543 op->m_brig_op_offset = op_queue.projected_size;
545 if (!op_queue.first_op)
546 op_queue.first_op = op;
547 else
548 op_queue.last_op->m_next = op;
549 op_queue.last_op = op;
551 if (is_a <hsa_op_immed *> (op))
552 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
553 else if (is_a <hsa_op_reg *> (op))
554 op_queue.projected_size += sizeof (struct BrigOperandRegister);
555 else if (is_a <hsa_op_address *> (op))
556 op_queue.projected_size += sizeof (struct BrigOperandAddress);
557 else if (is_a <hsa_op_code_ref *> (op))
558 op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
559 else if (is_a <hsa_op_code_list *> (op))
560 op_queue.projected_size += sizeof (struct BrigOperandCodeList);
561 else if (is_a <hsa_op_operand_list *> (op))
562 op_queue.projected_size += sizeof (struct BrigOperandOperandList);
563 else
564 gcc_unreachable ();
565 return ret;
569 /* Emit directive describing a symbol if it has not been emitted already.
570 Return the offset of the directive. */
572 static unsigned
573 emit_directive_variable (struct hsa_symbol *symbol)
575 struct BrigDirectiveVariable dirvar;
576 unsigned name_offset;
577 static unsigned res_name_offset;
579 if (symbol->m_directive_offset)
580 return symbol->m_directive_offset;
582 memset (&dirvar, 0, sizeof (dirvar));
583 dirvar.base.byteCount = lendian16 (sizeof (dirvar));
584 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
585 dirvar.allocation = symbol->m_allocation;
587 char prefix = symbol->m_global_scope_p ? '&' : '%';
589 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
591 if (res_name_offset == 0)
592 res_name_offset = brig_emit_string (symbol->m_name, '%');
593 name_offset = res_name_offset;
595 else if (symbol->m_name)
596 name_offset = brig_emit_string (symbol->m_name, prefix);
597 else
599 char buf[64];
600 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
601 symbol->m_name_number);
602 name_offset = brig_emit_string (buf, prefix);
605 dirvar.name = lendian32 (name_offset);
606 dirvar.init = 0;
607 dirvar.type = lendian16 (symbol->m_type);
608 dirvar.segment = symbol->m_segment;
609 dirvar.align = symbol->m_align;
610 dirvar.linkage = symbol->m_linkage;
611 dirvar.dim.lo = symbol->m_dim;
612 dirvar.dim.hi = symbol->m_dim >> 32;
614 /* Global variables are just declared and linked via HSA runtime. */
615 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
616 dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
617 dirvar.reserved = 0;
619 if (symbol->m_cst_value)
621 dirvar.modifier |= BRIG_VARIABLE_CONST;
622 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
625 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
626 return symbol->m_directive_offset;
629 /* Emit directives describing either a function declaration or
630 definition F. */
632 static BrigDirectiveExecutable *
633 emit_function_directives (hsa_function_representation *f, bool is_declaration)
635 struct BrigDirectiveExecutable fndir;
636 unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
637 int count = 0;
638 BrigDirectiveExecutable *ptr_to_fndir;
639 hsa_symbol *sym;
641 if (!f->m_declaration_p)
642 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
644 gcc_assert (!sym->m_emitted_to_brig);
645 sym->m_emitted_to_brig = true;
646 emit_directive_variable (sym);
647 brig_insn_count++;
650 name_offset = brig_emit_string (f->m_name, '&');
651 inarg_off = brig_code.total_size + sizeof (fndir)
652 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
653 scoped_off = inarg_off
654 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
656 if (!f->m_declaration_p)
658 count += f->m_spill_symbols.length ();
659 count += f->m_private_variables.length ();
662 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
664 memset (&fndir, 0, sizeof (fndir));
665 fndir.base.byteCount = lendian16 (sizeof (fndir));
666 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
667 : BRIG_KIND_DIRECTIVE_FUNCTION);
668 fndir.name = lendian32 (name_offset);
669 fndir.inArgCount = lendian16 (f->m_input_args.length ());
670 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
671 fndir.firstInArg = lendian32 (inarg_off);
672 fndir.firstCodeBlockEntry = lendian32 (scoped_off);
673 fndir.nextModuleEntry = lendian32 (next_toplev_off);
674 fndir.linkage = f->get_linkage ();
675 if (!f->m_declaration_p)
676 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
677 memset (&fndir.reserved, 0, sizeof (fndir.reserved));
679 /* Once we put a definition of function_offsets, we should not overwrite
680 it with a declaration of the function. */
681 if (f->m_internal_fn == NULL)
683 if (!function_offsets->get (f->m_decl) || !is_declaration)
684 function_offsets->put (f->m_decl, brig_code.total_size);
686 else
688 /* Internal function. */
689 hsa_internal_fn **slot
690 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
691 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
692 int_fn->m_offset = brig_code.total_size;
693 *slot = int_fn;
696 brig_code.add (&fndir, sizeof (fndir));
697 /* terrible hack: we need to set instCount after we emit all
698 insns, but we need to emit directive in order, and we emit directives
699 during insn emitting. So we need to emit the FUNCTION directive
700 early, then the insns, and then we need to set instCount, so remember
701 a pointer to it, in some horrible way. cur_chunk.data+size points
702 directly to after fndir here. */
703 ptr_to_fndir
704 = (BrigDirectiveExecutable *)(brig_code.cur_chunk->data
705 + brig_code.cur_chunk->size
706 - sizeof (fndir));
708 if (f->m_output_arg)
709 emit_directive_variable (f->m_output_arg);
710 for (unsigned i = 0; i < f->m_input_args.length (); i++)
711 emit_directive_variable (f->m_input_args[i]);
713 if (!f->m_declaration_p)
715 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
717 emit_directive_variable (sym);
718 brig_insn_count++;
720 for (unsigned i = 0; i < f->m_private_variables.length (); i++)
722 emit_directive_variable (f->m_private_variables[i]);
723 brig_insn_count++;
727 return ptr_to_fndir;
730 /* Emit a label directive for the given HBB. We assume it is about to start on
731 the current offset in the code section. */
733 static void
734 emit_bb_label_directive (hsa_bb *hbb)
736 struct BrigDirectiveLabel lbldir;
738 lbldir.base.byteCount = lendian16 (sizeof (lbldir));
739 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
740 char buf[32];
741 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
742 hbb->m_index);
743 lbldir.name = lendian32 (brig_emit_string (buf, '@'));
745 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
746 sizeof (lbldir));
747 brig_insn_count++;
750 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
751 holding such, for constants and registers. */
753 static BrigType16_t
754 regtype_for_type (BrigType16_t t)
756 switch (t)
758 case BRIG_TYPE_B1:
759 return BRIG_TYPE_B1;
761 case BRIG_TYPE_U8:
762 case BRIG_TYPE_U16:
763 case BRIG_TYPE_U32:
764 case BRIG_TYPE_S8:
765 case BRIG_TYPE_S16:
766 case BRIG_TYPE_S32:
767 case BRIG_TYPE_B8:
768 case BRIG_TYPE_B16:
769 case BRIG_TYPE_B32:
770 case BRIG_TYPE_F16:
771 case BRIG_TYPE_F32:
772 case BRIG_TYPE_U8X4:
773 case BRIG_TYPE_U16X2:
774 case BRIG_TYPE_S8X4:
775 case BRIG_TYPE_S16X2:
776 case BRIG_TYPE_F16X2:
777 return BRIG_TYPE_B32;
779 case BRIG_TYPE_U64:
780 case BRIG_TYPE_S64:
781 case BRIG_TYPE_F64:
782 case BRIG_TYPE_B64:
783 case BRIG_TYPE_U8X8:
784 case BRIG_TYPE_U16X4:
785 case BRIG_TYPE_U32X2:
786 case BRIG_TYPE_S8X8:
787 case BRIG_TYPE_S16X4:
788 case BRIG_TYPE_S32X2:
789 case BRIG_TYPE_F16X4:
790 case BRIG_TYPE_F32X2:
791 return BRIG_TYPE_B64;
793 case BRIG_TYPE_B128:
794 case BRIG_TYPE_U8X16:
795 case BRIG_TYPE_U16X8:
796 case BRIG_TYPE_U32X4:
797 case BRIG_TYPE_U64X2:
798 case BRIG_TYPE_S8X16:
799 case BRIG_TYPE_S16X8:
800 case BRIG_TYPE_S32X4:
801 case BRIG_TYPE_S64X2:
802 case BRIG_TYPE_F16X8:
803 case BRIG_TYPE_F32X4:
804 case BRIG_TYPE_F64X2:
805 return BRIG_TYPE_B128;
807 default:
808 gcc_unreachable ();
812 /* Return the length of the BRIG type TYPE that is going to be streamed out as
813 an immediate constant (so it must not be B1). */
815 unsigned
816 hsa_get_imm_brig_type_len (BrigType16_t type)
818 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
819 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
821 switch (pack_type)
823 case BRIG_TYPE_PACK_NONE:
824 break;
825 case BRIG_TYPE_PACK_32:
826 return 4;
827 case BRIG_TYPE_PACK_64:
828 return 8;
829 case BRIG_TYPE_PACK_128:
830 return 16;
831 default:
832 gcc_unreachable ();
835 switch (base_type)
837 case BRIG_TYPE_U8:
838 case BRIG_TYPE_S8:
839 case BRIG_TYPE_B8:
840 return 1;
841 case BRIG_TYPE_U16:
842 case BRIG_TYPE_S16:
843 case BRIG_TYPE_F16:
844 case BRIG_TYPE_B16:
845 return 2;
846 case BRIG_TYPE_U32:
847 case BRIG_TYPE_S32:
848 case BRIG_TYPE_F32:
849 case BRIG_TYPE_B32:
850 return 4;
851 case BRIG_TYPE_U64:
852 case BRIG_TYPE_S64:
853 case BRIG_TYPE_F64:
854 case BRIG_TYPE_B64:
855 return 8;
856 case BRIG_TYPE_B128:
857 return 16;
858 default:
859 gcc_unreachable ();
863 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
864 If NEED_LEN is not equal to zero, shrink or extend the value
865 to NEED_LEN bytes. Return how many bytes were written. */
867 static int
868 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
870 union hsa_bytes bytes;
872 memset (&bytes, 0, sizeof (bytes));
873 tree type = TREE_TYPE (value);
874 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
876 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
877 if (INTEGRAL_TYPE_P (type)
878 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
879 switch (data_len)
881 case 1:
882 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
883 break;
884 case 2:
885 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
886 break;
887 case 4:
888 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
889 break;
890 case 8:
891 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
892 break;
893 default:
894 gcc_unreachable ();
896 else if (SCALAR_FLOAT_TYPE_P (type))
898 if (data_len == 2)
900 sorry ("Support for HSA does not implement immediate 16 bit FPU "
901 "operands");
902 return 2;
904 unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type));
905 /* There are always 32 bits in each long, no matter the size of
906 the hosts long. */
907 long tmp[6];
909 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
911 if (int_len == 4)
912 bytes.b32 = (uint32_t) tmp[0];
913 else
915 bytes.b64 = (uint64_t)(uint32_t) tmp[1];
916 bytes.b64 <<= 32;
917 bytes.b64 |= (uint32_t) tmp[0];
920 else
921 gcc_unreachable ();
923 int len;
924 if (need_len == 0)
925 len = data_len;
926 else
927 len = need_len;
929 memcpy (data, &bytes, len);
930 return len;
933 char *
934 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
936 char *brig_repr;
937 *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
939 if (m_tree_value != NULL_TREE)
941 /* Update brig_repr_size for special tree values. */
942 if (TREE_CODE (m_tree_value) == STRING_CST)
943 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
944 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
945 *brig_repr_size
946 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
948 unsigned total_len = *brig_repr_size;
950 /* As we can have a constructor with fewer elements, fill the memory
951 with zeros. */
952 brig_repr = XCNEWVEC (char, total_len);
953 char *p = brig_repr;
955 if (TREE_CODE (m_tree_value) == VECTOR_CST)
957 int i, num = VECTOR_CST_NELTS (m_tree_value);
958 for (i = 0; i < num; i++)
960 tree v = VECTOR_CST_ELT (m_tree_value, i);
961 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
962 total_len -= actual;
963 p += actual;
965 /* Vectors should have the exact size. */
966 gcc_assert (total_len == 0);
968 else if (TREE_CODE (m_tree_value) == STRING_CST)
969 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
970 TREE_STRING_LENGTH (m_tree_value));
971 else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
973 gcc_assert (total_len % 2 == 0);
974 unsigned actual;
975 actual
976 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
977 total_len / 2);
979 gcc_assert (actual == total_len / 2);
980 p += actual;
982 actual
983 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
984 total_len / 2);
985 gcc_assert (actual == total_len / 2);
987 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
989 unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
990 for (unsigned i = 0; i < len; i++)
992 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
993 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
994 total_len -= actual;
995 p += actual;
998 else
999 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
1001 else
1003 hsa_bytes bytes;
1005 switch (*brig_repr_size)
1007 case 1:
1008 bytes.b8 = (uint8_t) m_int_value;
1009 break;
1010 case 2:
1011 bytes.b16 = (uint16_t) m_int_value;
1012 break;
1013 case 4:
1014 bytes.b32 = (uint32_t) m_int_value;
1015 break;
1016 case 8:
1017 bytes.b64 = (uint64_t) m_int_value;
1018 break;
1019 default:
1020 gcc_unreachable ();
1023 brig_repr = XNEWVEC (char, *brig_repr_size);
1024 memcpy (brig_repr, &bytes, *brig_repr_size);
1027 return brig_repr;
1030 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1031 have been massaged to comply with various HSA/BRIG type requirements, so the
1032 only important aspect of that is the length (because HSAIL might expect
1033 smaller constants or become bit-data). The data should be represented
1034 according to what is in the tree representation. */
1036 static void
1037 emit_immediate_operand (hsa_op_immed *imm)
1039 unsigned brig_repr_size;
1040 char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1041 struct BrigOperandConstantBytes out;
1043 memset (&out, 0, sizeof (out));
1044 out.base.byteCount = lendian16 (sizeof (out));
1045 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1046 uint32_t byteCount = lendian32 (brig_repr_size);
1047 out.type = lendian16 (imm->m_type);
1048 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1049 brig_operand.add (&out, sizeof (out));
1050 brig_data.add (brig_repr, brig_repr_size);
1051 brig_data.round_size_up (4);
1053 free (brig_repr);
1056 /* Emit a register BRIG operand REG. */
1058 static void
1059 emit_register_operand (hsa_op_reg *reg)
1061 struct BrigOperandRegister out;
1063 out.base.byteCount = lendian16 (sizeof (out));
1064 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1065 out.regNum = lendian32 (reg->m_hard_num);
1067 switch (regtype_for_type (reg->m_type))
1069 case BRIG_TYPE_B32:
1070 out.regKind = BRIG_REGISTER_KIND_SINGLE;
1071 break;
1072 case BRIG_TYPE_B64:
1073 out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1074 break;
1075 case BRIG_TYPE_B128:
1076 out.regKind = BRIG_REGISTER_KIND_QUAD;
1077 break;
1078 case BRIG_TYPE_B1:
1079 out.regKind = BRIG_REGISTER_KIND_CONTROL;
1080 break;
1081 default:
1082 gcc_unreachable ();
1085 brig_operand.add (&out, sizeof (out));
1088 /* Emit an address BRIG operand ADDR. */
1090 static void
1091 emit_address_operand (hsa_op_address *addr)
1093 struct BrigOperandAddress out;
1095 out.base.byteCount = lendian16 (sizeof (out));
1096 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1097 out.symbol = addr->m_symbol
1098 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1099 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1101 if (sizeof (addr->m_imm_offset) == 8)
1103 out.offset.lo = lendian32 (addr->m_imm_offset);
1104 out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1106 else
1108 gcc_assert (sizeof (addr->m_imm_offset) == 4);
1109 out.offset.lo = lendian32 (addr->m_imm_offset);
1110 out.offset.hi = 0;
1113 brig_operand.add (&out, sizeof (out));
1116 /* Emit a code reference operand REF. */
1118 static void
1119 emit_code_ref_operand (hsa_op_code_ref *ref)
1121 struct BrigOperandCodeRef out;
1123 out.base.byteCount = lendian16 (sizeof (out));
1124 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1125 out.ref = lendian32 (ref->m_directive_offset);
1126 brig_operand.add (&out, sizeof (out));
1129 /* Emit a code list operand CODE_LIST. */
1131 static void
1132 emit_code_list_operand (hsa_op_code_list *code_list)
1134 struct BrigOperandCodeList out;
1135 unsigned args = code_list->m_offsets.length ();
1137 for (unsigned i = 0; i < args; i++)
1138 gcc_assert (code_list->m_offsets[i]);
1140 out.base.byteCount = lendian16 (sizeof (out));
1141 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1143 uint32_t byteCount = lendian32 (4 * args);
1145 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1146 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1147 brig_data.round_size_up (4);
1148 brig_operand.add (&out, sizeof (out));
1151 /* Emit an operand list operand OPERAND_LIST. */
1153 static void
1154 emit_operand_list_operand (hsa_op_operand_list *operand_list)
1156 struct BrigOperandOperandList out;
1157 unsigned args = operand_list->m_offsets.length ();
1159 for (unsigned i = 0; i < args; i++)
1160 gcc_assert (operand_list->m_offsets[i]);
1162 out.base.byteCount = lendian16 (sizeof (out));
1163 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1165 uint32_t byteCount = lendian32 (4 * args);
1167 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1168 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1169 brig_data.round_size_up (4);
1170 brig_operand.add (&out, sizeof (out));
1173 /* Emit all operands queued for writing. */
1175 static void
1176 emit_queued_operands (void)
1178 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1180 gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1181 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1182 emit_immediate_operand (imm);
1183 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1184 emit_register_operand (reg);
1185 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1186 emit_address_operand (addr);
1187 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1188 emit_code_ref_operand (ref);
1189 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1190 emit_code_list_operand (code_list);
1191 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1192 emit_operand_list_operand (l);
1193 else
1194 gcc_unreachable ();
1198 /* Emit directives describing the function that is used for
1199 a function declaration. */
1201 static BrigDirectiveExecutable *
1202 emit_function_declaration (tree decl)
1204 hsa_function_representation *f = hsa_generate_function_declaration (decl);
1206 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1207 emit_queued_operands ();
1209 delete f;
1211 return e;
1214 /* Emit directives describing the function that is used for
1215 an internal function declaration. */
1217 static BrigDirectiveExecutable *
1218 emit_internal_fn_decl (hsa_internal_fn *fn)
1220 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1222 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1223 emit_queued_operands ();
1225 delete f;
1227 return e;
1230 /* Enqueue all operands of INSN and return offset to BRIG data section
1231 to list of operand offsets. */
1233 static unsigned
1234 emit_insn_operands (hsa_insn_basic *insn)
1236 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1237 operand_offsets;
1239 unsigned l = insn->operand_count ();
1240 operand_offsets.safe_grow (l);
1242 for (unsigned i = 0; i < l; i++)
1243 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1245 /* We have N operands so use 4 * N for the byte_count. */
1246 uint32_t byte_count = lendian32 (4 * l);
1248 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1249 brig_data.add (operand_offsets.address (),
1250 l * sizeof (BrigOperandOffset32_t));
1252 brig_data.round_size_up (4);
1254 return offset;
1257 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1258 to BRIG data section to list of operand offsets. */
1260 static unsigned
1261 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1262 hsa_op_base *op2 = NULL)
1264 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1265 operand_offsets;
1267 gcc_checking_assert (op0 != NULL);
1268 operand_offsets.safe_push (enqueue_op (op0));
1270 if (op1 != NULL)
1272 operand_offsets.safe_push (enqueue_op (op1));
1273 if (op2 != NULL)
1274 operand_offsets.safe_push (enqueue_op (op2));
1277 unsigned l = operand_offsets.length ();
1279 /* We have N operands so use 4 * N for the byte_count. */
1280 uint32_t byte_count = lendian32 (4 * l);
1282 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1283 brig_data.add (operand_offsets.address (),
1284 l * sizeof (BrigOperandOffset32_t));
1286 brig_data.round_size_up (4);
1288 return offset;
1291 /* Emit an HSA memory instruction and all necessary directives, schedule
1292 necessary operands for writing. */
1294 static void
1295 emit_memory_insn (hsa_insn_mem *mem)
1297 struct BrigInstMem repr;
1298 gcc_checking_assert (mem->operand_count () == 2);
1300 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1302 /* This is necessary because of the erroneous typedef of
1303 BrigMemoryModifier8_t which introduces padding which may then contain
1304 random stuff (which we do not want so that we can test things don't
1305 change). */
1306 memset (&repr, 0, sizeof (repr));
1307 repr.base.base.byteCount = lendian16 (sizeof (repr));
1308 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1309 repr.base.opcode = lendian16 (mem->m_opcode);
1310 repr.base.type = lendian16 (mem->m_type);
1311 repr.base.operands = lendian32 (emit_insn_operands (mem));
1313 if (addr->m_symbol)
1314 repr.segment = addr->m_symbol->m_segment;
1315 else
1316 repr.segment = BRIG_SEGMENT_FLAT;
1317 repr.modifier = 0;
1318 repr.equivClass = mem->m_equiv_class;
1319 repr.align = mem->m_align;
1320 if (mem->m_opcode == BRIG_OPCODE_LD)
1321 repr.width = BRIG_WIDTH_1;
1322 else
1323 repr.width = BRIG_WIDTH_NONE;
1324 memset (&repr.reserved, 0, sizeof (repr.reserved));
1325 brig_code.add (&repr, sizeof (repr));
1326 brig_insn_count++;
1329 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1330 necessary operands for writing. */
1332 static void
1333 emit_signal_insn (hsa_insn_signal *mem)
1335 struct BrigInstSignal repr;
1337 /* This is necessary because of the erroneous typedef of
1338 BrigMemoryModifier8_t which introduces padding which may then contain
1339 random stuff (which we do not want so that we can test things don't
1340 change). */
1341 memset (&repr, 0, sizeof (repr));
1342 repr.base.base.byteCount = lendian16 (sizeof (repr));
1343 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1344 repr.base.opcode = lendian16 (mem->m_opcode);
1345 repr.base.type = lendian16 (mem->m_type);
1346 repr.base.operands = lendian32 (emit_insn_operands (mem));
1348 repr.memoryOrder = mem->m_memoryorder;
1349 repr.signalOperation = mem->m_atomicop;
1350 repr.signalType = BRIG_TYPE_SIG64;
1352 brig_code.add (&repr, sizeof (repr));
1353 brig_insn_count++;
1356 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1357 necessary operands for writing. */
1359 static void
1360 emit_atomic_insn (hsa_insn_atomic *mem)
1362 struct BrigInstAtomic repr;
1364 /* Either operand[0] or operand[1] must be an address operand. */
1365 hsa_op_address *addr = NULL;
1366 if (is_a <hsa_op_address *> (mem->get_op (0)))
1367 addr = as_a <hsa_op_address *> (mem->get_op (0));
1368 else
1369 addr = as_a <hsa_op_address *> (mem->get_op (1));
1371 /* This is necessary because of the erroneous typedef of
1372 BrigMemoryModifier8_t which introduces padding which may then contain
1373 random stuff (which we do not want so that we can test things don't
1374 change). */
1375 memset (&repr, 0, sizeof (repr));
1376 repr.base.base.byteCount = lendian16 (sizeof (repr));
1377 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1378 repr.base.opcode = lendian16 (mem->m_opcode);
1379 repr.base.type = lendian16 (mem->m_type);
1380 repr.base.operands = lendian32 (emit_insn_operands (mem));
1382 if (addr->m_symbol)
1383 repr.segment = addr->m_symbol->m_segment;
1384 else
1385 repr.segment = BRIG_SEGMENT_FLAT;
1386 repr.memoryOrder = mem->m_memoryorder;
1387 repr.memoryScope = mem->m_memoryscope;
1388 repr.atomicOperation = mem->m_atomicop;
1390 brig_code.add (&repr, sizeof (repr));
1391 brig_insn_count++;
1394 /* Emit an HSA LDA instruction and all necessary directives, schedule
1395 necessary operands for writing. */
1397 static void
1398 emit_addr_insn (hsa_insn_basic *insn)
1400 struct BrigInstAddr repr;
1402 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1404 repr.base.base.byteCount = lendian16 (sizeof (repr));
1405 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1406 repr.base.opcode = lendian16 (insn->m_opcode);
1407 repr.base.type = lendian16 (insn->m_type);
1408 repr.base.operands = lendian32 (emit_insn_operands (insn));
1410 if (addr->m_symbol)
1411 repr.segment = addr->m_symbol->m_segment;
1412 else
1413 repr.segment = BRIG_SEGMENT_FLAT;
1414 memset (&repr.reserved, 0, sizeof (repr.reserved));
1416 brig_code.add (&repr, sizeof (repr));
1417 brig_insn_count++;
1420 /* Emit an HSA segment conversion instruction and all necessary directives,
1421 schedule necessary operands for writing. */
1423 static void
1424 emit_segment_insn (hsa_insn_seg *seg)
1426 struct BrigInstSegCvt repr;
1428 repr.base.base.byteCount = lendian16 (sizeof (repr));
1429 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1430 repr.base.opcode = lendian16 (seg->m_opcode);
1431 repr.base.type = lendian16 (seg->m_type);
1432 repr.base.operands = lendian32 (emit_insn_operands (seg));
1433 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1434 repr.segment = seg->m_segment;
1435 repr.modifier = 0;
1437 brig_code.add (&repr, sizeof (repr));
1439 brig_insn_count++;
1442 /* Emit an HSA alloca instruction and all necessary directives,
1443 schedule necessary operands for writing. */
1445 static void
1446 emit_alloca_insn (hsa_insn_alloca *alloca)
1448 struct BrigInstMem repr;
1449 gcc_checking_assert (alloca->operand_count () == 2);
1451 /* This is necessary because of the erroneous typedef of
1452 BrigMemoryModifier8_t which introduces padding which may then contain
1453 random stuff (which we do not want so that we can test things don't
1454 change). */
1455 memset (&repr, 0, sizeof (repr));
1456 repr.base.base.byteCount = lendian16 (sizeof (repr));
1457 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1458 repr.base.opcode = lendian16 (alloca->m_opcode);
1459 repr.base.type = lendian16 (alloca->m_type);
1460 repr.base.operands = lendian32 (emit_insn_operands (alloca));
1461 repr.segment = BRIG_SEGMENT_PRIVATE;
1462 repr.modifier = 0;
1463 repr.equivClass = 0;
1464 repr.align = alloca->m_align;
1465 repr.width = BRIG_WIDTH_NONE;
1466 memset (&repr.reserved, 0, sizeof (repr.reserved));
1467 brig_code.add (&repr, sizeof (repr));
1468 brig_insn_count++;
1471 /* Emit an HSA comparison instruction and all necessary directives,
1472 schedule necessary operands for writing. */
1474 static void
1475 emit_cmp_insn (hsa_insn_cmp *cmp)
1477 struct BrigInstCmp repr;
1479 memset (&repr, 0, sizeof (repr));
1480 repr.base.base.byteCount = lendian16 (sizeof (repr));
1481 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1482 repr.base.opcode = lendian16 (cmp->m_opcode);
1483 repr.base.type = lendian16 (cmp->m_type);
1484 repr.base.operands = lendian32 (emit_insn_operands (cmp));
1486 if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1487 repr.sourceType
1488 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1489 else
1490 repr.sourceType
1491 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1492 repr.modifier = 0;
1493 repr.compare = cmp->m_compare;
1494 repr.pack = 0;
1496 brig_code.add (&repr, sizeof (repr));
1497 brig_insn_count++;
1500 /* Emit an HSA branching instruction and all necessary directives, schedule
1501 necessary operands for writing. */
1503 static void
1504 emit_branch_insn (hsa_insn_br *br)
1506 struct BrigInstBr repr;
1508 basic_block target = NULL;
1509 edge_iterator ei;
1510 edge e;
1512 /* At the moment we only handle direct conditional jumps. */
1513 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1514 repr.base.base.byteCount = lendian16 (sizeof (repr));
1515 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1516 repr.base.opcode = lendian16 (br->m_opcode);
1517 repr.width = BRIG_WIDTH_1;
1518 /* For Conditional jumps the type is always B1. */
1519 repr.base.type = lendian16 (BRIG_TYPE_B1);
1521 FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1522 if (e->flags & EDGE_TRUE_VALUE)
1524 target = e->dest;
1525 break;
1527 gcc_assert (target);
1529 repr.base.operands
1530 = lendian32 (emit_operands (br->get_op (0),
1531 &hsa_bb_for_bb (target)->m_label_ref));
1532 memset (&repr.reserved, 0, sizeof (repr.reserved));
1534 brig_code.add (&repr, sizeof (repr));
1535 brig_insn_count++;
1538 /* Emit an HSA unconditional jump branching instruction that points to
1539 a label REFERENCE. */
1541 static void
1542 emit_unconditional_jump (hsa_op_code_ref *reference)
1544 struct BrigInstBr repr;
1546 repr.base.base.byteCount = lendian16 (sizeof (repr));
1547 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1548 repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1549 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1550 /* Direct branches to labels must be width(all). */
1551 repr.width = BRIG_WIDTH_ALL;
1553 repr.base.operands = lendian32 (emit_operands (reference));
1554 memset (&repr.reserved, 0, sizeof (repr.reserved));
1555 brig_code.add (&repr, sizeof (repr));
1556 brig_insn_count++;
1559 /* Emit an HSA switch jump instruction that uses a jump table to
1560 jump to a destination label. */
1562 static void
1563 emit_switch_insn (hsa_insn_sbr *sbr)
1565 struct BrigInstBr repr;
1567 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1568 repr.base.base.byteCount = lendian16 (sizeof (repr));
1569 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1570 repr.base.opcode = lendian16 (sbr->m_opcode);
1571 repr.width = BRIG_WIDTH_1;
1572 /* For Conditional jumps the type is always B1. */
1573 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1574 repr.base.type = lendian16 (index->m_type);
1575 repr.base.operands
1576 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1577 memset (&repr.reserved, 0, sizeof (repr.reserved));
1579 brig_code.add (&repr, sizeof (repr));
1580 brig_insn_count++;
1583 /* Emit a HSA convert instruction and all necessary directives, schedule
1584 necessary operands for writing. */
1586 static void
1587 emit_cvt_insn (hsa_insn_cvt *insn)
1589 struct BrigInstCvt repr;
1590 BrigType16_t srctype;
1592 repr.base.base.byteCount = lendian16 (sizeof (repr));
1593 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1594 repr.base.opcode = lendian16 (insn->m_opcode);
1595 repr.base.type = lendian16 (insn->m_type);
1596 repr.base.operands = lendian32 (emit_insn_operands (insn));
1598 if (is_a <hsa_op_reg *> (insn->get_op (1)))
1599 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1600 else
1601 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1602 repr.sourceType = lendian16 (srctype);
1603 repr.modifier = 0;
1604 /* float to smaller float requires a rounding setting (we default
1605 to 'near'. */
1606 if (hsa_type_float_p (insn->m_type)
1607 && (!hsa_type_float_p (srctype)
1608 || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1609 < (srctype & BRIG_TYPE_BASE_MASK))))
1610 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1611 else if (hsa_type_integer_p (insn->m_type) &&
1612 hsa_type_float_p (srctype))
1613 repr.round = BRIG_ROUND_INTEGER_ZERO;
1614 else
1615 repr.round = BRIG_ROUND_NONE;
1616 brig_code.add (&repr, sizeof (repr));
1617 brig_insn_count++;
1620 /* Emit call instruction INSN, where this instruction must be closed
1621 within a call block instruction. */
1623 static void
1624 emit_call_insn (hsa_insn_call *call)
1626 struct BrigInstBr repr;
1628 repr.base.base.byteCount = lendian16 (sizeof (repr));
1629 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1630 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1631 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1633 repr.base.operands
1634 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1635 call->m_args_code_list));
1637 /* Internal functions have not set m_called_function. */
1638 if (call->m_called_function)
1640 function_linkage_pair pair (call->m_called_function,
1641 call->m_func.m_brig_op_offset);
1642 function_call_linkage.safe_push (pair);
1644 else
1646 hsa_internal_fn *slot
1647 = hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1648 gcc_assert (slot);
1649 gcc_assert (slot->m_offset > 0);
1650 call->m_func.m_directive_offset = slot->m_offset;
1653 repr.width = BRIG_WIDTH_ALL;
1654 memset (&repr.reserved, 0, sizeof (repr.reserved));
1656 brig_code.add (&repr, sizeof (repr));
1657 brig_insn_count++;
1660 /* Emit argument block directive. */
1662 static void
1663 emit_arg_block_insn (hsa_insn_arg_block *insn)
1665 switch (insn->m_kind)
1667 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1669 struct BrigDirectiveArgBlock repr;
1670 repr.base.byteCount = lendian16 (sizeof (repr));
1671 repr.base.kind = lendian16 (insn->m_kind);
1672 brig_code.add (&repr, sizeof (repr));
1674 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1676 insn->m_call_insn->m_args_code_list->m_offsets[i]
1677 = lendian32 (emit_directive_variable
1678 (insn->m_call_insn->m_input_args[i]));
1679 brig_insn_count++;
1682 if (insn->m_call_insn->m_output_arg)
1684 insn->m_call_insn->m_result_code_list->m_offsets[0]
1685 = lendian32 (emit_directive_variable
1686 (insn->m_call_insn->m_output_arg));
1687 brig_insn_count++;
1690 break;
1692 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1694 struct BrigDirectiveArgBlock repr;
1695 repr.base.byteCount = lendian16 (sizeof (repr));
1696 repr.base.kind = lendian16 (insn->m_kind);
1697 brig_code.add (&repr, sizeof (repr));
1698 break;
1700 default:
1701 gcc_unreachable ();
1704 brig_insn_count++;
1707 /* Emit comment directive. */
1709 static void
1710 emit_comment_insn (hsa_insn_comment *insn)
1712 struct BrigDirectiveComment repr;
1713 memset (&repr, 0, sizeof (repr));
1715 repr.base.byteCount = lendian16 (sizeof (repr));
1716 repr.base.kind = lendian16 (insn->m_opcode);
1717 repr.name = brig_emit_string (insn->m_comment, '\0', false);
1718 brig_code.add (&repr, sizeof (repr));
1721 /* Emit queue instruction INSN. */
1723 static void
1724 emit_queue_insn (hsa_insn_queue *insn)
1726 BrigInstQueue repr;
1727 memset (&repr, 0, sizeof (repr));
1729 repr.base.base.byteCount = lendian16 (sizeof (repr));
1730 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1731 repr.base.opcode = lendian16 (insn->m_opcode);
1732 repr.base.type = lendian16 (insn->m_type);
1733 repr.segment = BRIG_SEGMENT_GLOBAL;
1734 repr.memoryOrder = BRIG_MEMORY_ORDER_SC_RELEASE;
1735 repr.base.operands = lendian32 (emit_insn_operands (insn));
1736 brig_data.round_size_up (4);
1737 brig_code.add (&repr, sizeof (repr));
1739 brig_insn_count++;
1742 /* Emit source type instruction INSN. */
1744 static void
1745 emit_srctype_insn (hsa_insn_srctype *insn)
1747 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1748 struct BrigInstSourceType repr;
1749 unsigned operand_count = insn->operand_count ();
1750 gcc_checking_assert (operand_count >= 2);
1752 memset (&repr, 0, sizeof (repr));
1753 repr.sourceType = lendian16 (insn->m_source_type);
1754 repr.base.base.byteCount = lendian16 (sizeof (repr));
1755 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1756 repr.base.opcode = lendian16 (insn->m_opcode);
1757 repr.base.type = lendian16 (insn->m_type);
1759 repr.base.operands = lendian32 (emit_insn_operands (insn));
1760 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1761 brig_insn_count++;
1764 /* Emit packed instruction INSN. */
1766 static void
1767 emit_packed_insn (hsa_insn_packed *insn)
1769 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1770 struct BrigInstSourceType repr;
1771 unsigned operand_count = insn->operand_count ();
1772 gcc_checking_assert (operand_count >= 2);
1774 memset (&repr, 0, sizeof (repr));
1775 repr.sourceType = lendian16 (insn->m_source_type);
1776 repr.base.base.byteCount = lendian16 (sizeof (repr));
1777 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1778 repr.base.opcode = lendian16 (insn->m_opcode);
1779 repr.base.type = lendian16 (insn->m_type);
1781 if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1783 /* Create operand list for packed type. */
1784 for (unsigned i = 1; i < operand_count; i++)
1786 gcc_checking_assert (insn->get_op (i));
1787 insn->m_operand_list->m_offsets[i - 1]
1788 = lendian32 (enqueue_op (insn->get_op (i)));
1791 repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1792 insn->m_operand_list));
1794 else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1796 /* Create operand list for packed type. */
1797 for (unsigned i = 0; i < operand_count - 1; i++)
1799 gcc_checking_assert (insn->get_op (i));
1800 insn->m_operand_list->m_offsets[i]
1801 = lendian32 (enqueue_op (insn->get_op (i)));
1804 unsigned ops = emit_operands (insn->m_operand_list,
1805 insn->get_op (insn->operand_count () - 1));
1806 repr.base.operands = lendian32 (ops);
1810 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1811 brig_insn_count++;
1814 /* Emit a basic HSA instruction and all necessary directives, schedule
1815 necessary operands for writing. */
1817 static void
1818 emit_basic_insn (hsa_insn_basic *insn)
1820 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1821 struct BrigInstMod repr;
1822 BrigType16_t type;
1824 memset (&repr, 0, sizeof (repr));
1825 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1826 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1827 repr.base.opcode = lendian16 (insn->m_opcode);
1828 switch (insn->m_opcode)
1830 /* And the bit-logical operations need bit types and whine about
1831 arithmetic types :-/ */
1832 case BRIG_OPCODE_AND:
1833 case BRIG_OPCODE_OR:
1834 case BRIG_OPCODE_XOR:
1835 case BRIG_OPCODE_NOT:
1836 type = regtype_for_type (insn->m_type);
1837 break;
1838 default:
1839 type = insn->m_type;
1840 break;
1842 repr.base.type = lendian16 (type);
1843 repr.base.operands = lendian32 (emit_insn_operands (insn));
1845 if (hsa_type_packed_p (type))
1847 if (hsa_type_float_p (type)
1848 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1849 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1850 else
1851 repr.round = 0;
1852 /* We assume that destination and sources agree in packing layout. */
1853 if (insn->num_used_ops () >= 2)
1854 repr.pack = BRIG_PACK_PP;
1855 else
1856 repr.pack = BRIG_PACK_P;
1857 repr.reserved = 0;
1858 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1859 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1860 brig_code.add (&repr, sizeof (struct BrigInstMod));
1862 else
1863 brig_code.add (&repr, sizeof (struct BrigInstBasic));
1864 brig_insn_count++;
1867 /* Emit an HSA instruction and all necessary directives, schedule necessary
1868 operands for writing. */
1870 static void
1871 emit_insn (hsa_insn_basic *insn)
1873 gcc_assert (!is_a <hsa_insn_phi *> (insn));
1875 insn->m_brig_offset = brig_code.total_size;
1877 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1878 emit_signal_insn (signal);
1879 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1880 emit_atomic_insn (atom);
1881 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1882 emit_memory_insn (mem);
1883 else if (insn->m_opcode == BRIG_OPCODE_LDA)
1884 emit_addr_insn (insn);
1885 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1886 emit_segment_insn (seg);
1887 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1888 emit_cmp_insn (cmp);
1889 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1890 emit_branch_insn (br);
1891 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1893 if (switch_instructions == NULL)
1894 switch_instructions = new vec <hsa_insn_sbr *> ();
1896 switch_instructions->safe_push (sbr);
1897 emit_switch_insn (sbr);
1899 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1900 emit_arg_block_insn (block);
1901 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1902 emit_call_insn (call);
1903 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1904 emit_comment_insn (comment);
1905 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1906 emit_queue_insn (queue);
1907 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1908 emit_srctype_insn (srctype);
1909 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1910 emit_packed_insn (packed);
1911 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1912 emit_cvt_insn (cvt);
1913 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1914 emit_alloca_insn (alloca);
1915 else
1916 emit_basic_insn (insn);
1919 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1920 or we are about to finish emitting code, if it is NULL. If the fall through
1921 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1923 static void
1924 perhaps_emit_branch (basic_block bb, basic_block next_bb)
1926 basic_block t_bb = NULL, ff = NULL;
1928 edge_iterator ei;
1929 edge e;
1931 /* If the last instruction of BB is a switch, ignore emission of all
1932 edges. */
1933 if (hsa_bb_for_bb (bb)->m_last_insn
1934 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1935 return;
1937 FOR_EACH_EDGE (e, ei, bb->succs)
1938 if (e->flags & EDGE_TRUE_VALUE)
1940 gcc_assert (!t_bb);
1941 t_bb = e->dest;
1943 else
1945 gcc_assert (!ff);
1946 ff = e->dest;
1949 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1950 return;
1952 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1955 /* Emit the a function with name NAME to the various brig sections. */
1957 void
1958 hsa_brig_emit_function (void)
1960 basic_block bb, prev_bb;
1961 hsa_insn_basic *insn;
1962 BrigDirectiveExecutable *ptr_to_fndir;
1964 brig_init ();
1966 brig_insn_count = 0;
1967 memset (&op_queue, 0, sizeof (op_queue));
1968 op_queue.projected_size = brig_operand.total_size;
1970 if (!function_offsets)
1971 function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1973 if (!emitted_declarations)
1974 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1976 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1978 tree called = hsa_cfun->m_called_functions[i];
1980 /* If the function has no definition, emit a declaration. */
1981 if (!emitted_declarations->get (called))
1983 BrigDirectiveExecutable *e = emit_function_declaration (called);
1984 emitted_declarations->put (called, e);
1988 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
1990 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
1991 emit_internal_fn_decl (called);
1994 ptr_to_fndir = emit_function_directives (hsa_cfun, false);
1995 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
1996 insn;
1997 insn = insn->m_next)
1998 emit_insn (insn);
1999 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2000 FOR_EACH_BB_FN (bb, cfun)
2002 perhaps_emit_branch (prev_bb, bb);
2003 emit_bb_label_directive (hsa_bb_for_bb (bb));
2004 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2005 emit_insn (insn);
2006 prev_bb = bb;
2008 perhaps_emit_branch (prev_bb, NULL);
2009 ptr_to_fndir->nextModuleEntry = brig_code.total_size;
2011 /* Fill up label references for all sbr instructions. */
2012 if (switch_instructions)
2014 for (unsigned i = 0; i < switch_instructions->length (); i++)
2016 hsa_insn_sbr *sbr = (*switch_instructions)[i];
2017 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2019 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2020 sbr->m_label_code_list->m_offsets[j]
2021 = hbb->m_label_ref.m_directive_offset;
2025 switch_instructions->release ();
2026 delete switch_instructions;
2027 switch_instructions = NULL;
2030 if (dump_file)
2032 fprintf (dump_file, "------- After BRIG emission: -------\n");
2033 dump_hsa_cfun (dump_file);
2036 emit_queued_operands ();
2039 /* Emit all OMP symbols related to OMP. */
2041 void
2042 hsa_brig_emit_omp_symbols (void)
2044 brig_init ();
2045 emit_directive_variable (hsa_num_threads);
2048 /* Create and return __hsa_global_variables symbol that contains
2049 all informations consumed by libgomp to link global variables
2050 with their string names used by an HSA kernel. */
2052 static tree
2053 hsa_output_global_variables ()
2055 unsigned l = hsa_global_variable_symbols->elements ();
2057 tree variable_info_type = make_node (RECORD_TYPE);
2058 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2059 get_identifier ("name"), ptr_type_node);
2060 DECL_CHAIN (id_f1) = NULL_TREE;
2061 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2062 get_identifier ("omp_data_size"),
2063 ptr_type_node);
2064 DECL_CHAIN (id_f2) = id_f1;
2065 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2066 NULL_TREE);
2068 tree int_num_of_global_vars;
2069 int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2070 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2071 tree global_vars_array_type = build_array_type (variable_info_type,
2072 global_vars_num_index_type);
2073 TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2075 vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2077 for (hash_table <hsa_noop_symbol_hasher>::iterator it
2078 = hsa_global_variable_symbols->begin ();
2079 it != hsa_global_variable_symbols->end (); ++it)
2081 unsigned len = strlen ((*it)->m_name);
2082 char *copy = XNEWVEC (char, len + 2);
2083 copy[0] = '&';
2084 memcpy (copy + 1, (*it)->m_name, len);
2085 copy[len + 1] = '\0';
2086 len++;
2087 hsa_sanitize_name (copy);
2089 tree var_name = build_string (len, copy);
2090 TREE_TYPE (var_name)
2091 = build_array_type (char_type_node, build_index_type (size_int (len)));
2092 free (copy);
2094 vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2095 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2096 build1 (ADDR_EXPR,
2097 build_pointer_type (TREE_TYPE (var_name)),
2098 var_name));
2099 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2100 build_fold_addr_expr ((*it)->m_decl));
2102 tree variable_info_ctor = build_constructor (variable_info_type,
2103 variable_info_vec);
2105 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2106 variable_info_ctor);
2109 tree global_vars_ctor = build_constructor (global_vars_array_type,
2110 global_vars_vec);
2112 char tmp_name[64];
2113 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2114 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2115 get_identifier (tmp_name),
2116 global_vars_array_type);
2117 TREE_STATIC (global_vars_table) = 1;
2118 TREE_READONLY (global_vars_table) = 1;
2119 TREE_PUBLIC (global_vars_table) = 0;
2120 DECL_ARTIFICIAL (global_vars_table) = 1;
2121 DECL_IGNORED_P (global_vars_table) = 1;
2122 DECL_EXTERNAL (global_vars_table) = 0;
2123 TREE_CONSTANT (global_vars_table) = 1;
2124 DECL_INITIAL (global_vars_table) = global_vars_ctor;
2125 varpool_node::finalize_decl (global_vars_table);
2127 return global_vars_table;
2130 /* Create __hsa_host_functions and __hsa_kernels that contain
2131 all informations consumed by libgomp to register all kernels
2132 in the BRIG binary. */
2134 static void
2135 hsa_output_kernels (tree *host_func_table, tree *kernels)
2137 unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2139 tree int_num_of_kernels;
2140 int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2141 tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2142 tree host_functions_array_type = build_array_type (ptr_type_node,
2143 kernel_num_index_type);
2144 TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2146 vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2147 for (unsigned i = 0; i < map_count; ++i)
2149 tree decl = hsa_get_decl_kernel_mapping_decl (i);
2150 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2151 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2153 tree host_functions_ctor = build_constructor (host_functions_array_type,
2154 host_functions_vec);
2155 char tmp_name[64];
2156 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2157 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2158 get_identifier (tmp_name),
2159 host_functions_array_type);
2160 TREE_STATIC (hsa_host_func_table) = 1;
2161 TREE_READONLY (hsa_host_func_table) = 1;
2162 TREE_PUBLIC (hsa_host_func_table) = 0;
2163 DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2164 DECL_IGNORED_P (hsa_host_func_table) = 1;
2165 DECL_EXTERNAL (hsa_host_func_table) = 0;
2166 TREE_CONSTANT (hsa_host_func_table) = 1;
2167 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2168 varpool_node::finalize_decl (hsa_host_func_table);
2169 *host_func_table = hsa_host_func_table;
2171 /* Following code emits list of kernel_info structures. */
2173 tree kernel_info_type = make_node (RECORD_TYPE);
2174 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2175 get_identifier ("name"), ptr_type_node);
2176 DECL_CHAIN (id_f1) = NULL_TREE;
2177 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2178 get_identifier ("omp_data_size"),
2179 unsigned_type_node);
2180 DECL_CHAIN (id_f2) = id_f1;
2181 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2182 get_identifier ("gridified_kernel_p"),
2183 boolean_type_node);
2184 DECL_CHAIN (id_f3) = id_f2;
2185 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2186 get_identifier ("kernel_dependencies_count"),
2187 unsigned_type_node);
2188 DECL_CHAIN (id_f4) = id_f3;
2189 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2190 get_identifier ("kernel_dependencies"),
2191 build_pointer_type (build_pointer_type
2192 (char_type_node)));
2193 DECL_CHAIN (id_f5) = id_f4;
2194 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2195 NULL_TREE);
2197 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2198 tree kernel_info_vector_type
2199 = build_array_type (kernel_info_type,
2200 build_index_type (int_num_of_kernels));
2201 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2203 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2204 tree kernel_dependencies_vector_type = NULL;
2206 for (unsigned i = 0; i < map_count; ++i)
2208 tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2209 char *name = hsa_get_decl_kernel_mapping_name (i);
2210 unsigned len = strlen (name);
2211 char *copy = XNEWVEC (char, len + 2);
2212 copy[0] = '&';
2213 memcpy (copy + 1, name, len);
2214 copy[len + 1] = '\0';
2215 len++;
2217 tree kern_name = build_string (len, copy);
2218 TREE_TYPE (kern_name)
2219 = build_array_type (char_type_node, build_index_type (size_int (len)));
2220 free (copy);
2222 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2223 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2224 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2225 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2226 gridified_kernel_p);
2227 unsigned count = 0;
2229 kernel_dependencies_vector_type
2230 = build_array_type (build_pointer_type (char_type_node),
2231 build_index_type (size_int (0)));
2233 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2234 if (hsa_decl_kernel_dependencies)
2236 vec<const char *> **slot;
2237 slot = hsa_decl_kernel_dependencies->get (kernel);
2238 if (slot)
2240 vec <const char *> *dependencies = *slot;
2241 count = dependencies->length ();
2243 kernel_dependencies_vector_type
2244 = build_array_type (build_pointer_type (char_type_node),
2245 build_index_type (size_int (count)));
2246 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2248 for (unsigned j = 0; j < count; j++)
2250 const char *d = (*dependencies)[j];
2251 len = strlen (d);
2252 tree dependency_name = build_string (len, d);
2253 TREE_TYPE (dependency_name)
2254 = build_array_type (char_type_node,
2255 build_index_type (size_int (len)));
2257 CONSTRUCTOR_APPEND_ELT
2258 (kernel_dependencies_vec, NULL_TREE,
2259 build1 (ADDR_EXPR,
2260 build_pointer_type (TREE_TYPE (dependency_name)),
2261 dependency_name));
2266 tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2268 vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2269 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2270 build1 (ADDR_EXPR,
2271 build_pointer_type (TREE_TYPE
2272 (kern_name)),
2273 kern_name));
2274 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2275 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2276 gridified_kernel_p_tree);
2277 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2279 if (count > 0)
2281 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2282 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2283 get_identifier (tmp_name),
2284 kernel_dependencies_vector_type);
2286 TREE_STATIC (dependencies_list) = 1;
2287 TREE_READONLY (dependencies_list) = 1;
2288 TREE_PUBLIC (dependencies_list) = 0;
2289 DECL_ARTIFICIAL (dependencies_list) = 1;
2290 DECL_IGNORED_P (dependencies_list) = 1;
2291 DECL_EXTERNAL (dependencies_list) = 0;
2292 TREE_CONSTANT (dependencies_list) = 1;
2293 DECL_INITIAL (dependencies_list)
2294 = build_constructor (kernel_dependencies_vector_type,
2295 kernel_dependencies_vec);
2296 varpool_node::finalize_decl (dependencies_list);
2298 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2299 build1 (ADDR_EXPR,
2300 build_pointer_type
2301 (TREE_TYPE (dependencies_list)),
2302 dependencies_list));
2304 else
2305 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2307 tree kernel_info_ctor = build_constructor (kernel_info_type,
2308 kernel_info_vec);
2310 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2311 kernel_info_ctor);
2314 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2315 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2316 get_identifier (tmp_name),
2317 kernel_info_vector_type);
2319 TREE_STATIC (hsa_kernels) = 1;
2320 TREE_READONLY (hsa_kernels) = 1;
2321 TREE_PUBLIC (hsa_kernels) = 0;
2322 DECL_ARTIFICIAL (hsa_kernels) = 1;
2323 DECL_IGNORED_P (hsa_kernels) = 1;
2324 DECL_EXTERNAL (hsa_kernels) = 0;
2325 TREE_CONSTANT (hsa_kernels) = 1;
2326 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2327 kernel_info_vector_vec);
2328 varpool_node::finalize_decl (hsa_kernels);
2329 *kernels = hsa_kernels;
2332 /* Create a static constructor that will register out brig stuff with
2333 libgomp. */
2335 static void
2336 hsa_output_libgomp_mapping (tree brig_decl)
2338 unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2339 unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2341 tree kernels;
2342 tree host_func_table;
2344 hsa_output_kernels (&host_func_table, &kernels);
2345 tree global_vars = hsa_output_global_variables ();
2347 tree hsa_image_desc_type = make_node (RECORD_TYPE);
2348 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2349 get_identifier ("brig_module"), ptr_type_node);
2350 DECL_CHAIN (id_f1) = NULL_TREE;
2351 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2352 get_identifier ("kernel_count"),
2353 unsigned_type_node);
2355 DECL_CHAIN (id_f2) = id_f1;
2356 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2357 get_identifier ("hsa_kernel_infos"),
2358 ptr_type_node);
2359 DECL_CHAIN (id_f3) = id_f2;
2360 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2361 get_identifier ("global_variable_count"),
2362 unsigned_type_node);
2363 DECL_CHAIN (id_f4) = id_f3;
2364 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2365 get_identifier ("hsa_global_variable_infos"),
2366 ptr_type_node);
2367 DECL_CHAIN (id_f5) = id_f4;
2368 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2369 NULL_TREE);
2370 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2372 vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2373 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2374 build_fold_addr_expr (brig_decl));
2375 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2376 build_int_cstu (unsigned_type_node, kernel_count));
2377 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2378 build1 (ADDR_EXPR,
2379 build_pointer_type (TREE_TYPE (kernels)),
2380 kernels));
2381 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2382 build_int_cstu (unsigned_type_node,
2383 global_variable_count));
2384 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2385 build1 (ADDR_EXPR,
2386 build_pointer_type (TREE_TYPE (global_vars)),
2387 global_vars));
2389 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2391 char tmp_name[64];
2392 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2393 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2394 get_identifier (tmp_name),
2395 hsa_image_desc_type);
2396 TREE_STATIC (hsa_img_descriptor) = 1;
2397 TREE_READONLY (hsa_img_descriptor) = 1;
2398 TREE_PUBLIC (hsa_img_descriptor) = 0;
2399 DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2400 DECL_IGNORED_P (hsa_img_descriptor) = 1;
2401 DECL_EXTERNAL (hsa_img_descriptor) = 0;
2402 TREE_CONSTANT (hsa_img_descriptor) = 1;
2403 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2404 varpool_node::finalize_decl (hsa_img_descriptor);
2406 /* Construct the "host_table" libgomp expects. */
2407 tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2408 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2409 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2410 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2411 tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2412 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2413 host_func_table_addr);
2414 offset_int func_table_size
2415 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2416 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2417 fold_build2 (POINTER_PLUS_EXPR,
2418 TREE_TYPE (host_func_table_addr),
2419 host_func_table_addr,
2420 build_int_cst (size_type_node,
2421 func_table_size.to_uhwi
2422 ())));
2423 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2424 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2425 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2426 libgomp_host_table_vec);
2427 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2428 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2429 get_identifier (tmp_name),
2430 libgomp_host_table_type);
2432 TREE_STATIC (hsa_libgomp_host_table) = 1;
2433 TREE_READONLY (hsa_libgomp_host_table) = 1;
2434 TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2435 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2436 DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2437 DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2438 TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2439 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2440 varpool_node::finalize_decl (hsa_libgomp_host_table);
2442 /* Generate an initializer with a call to the registration routine. */
2444 tree offload_register
2445 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2446 gcc_checking_assert (offload_register);
2448 tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2449 append_to_statement_list
2450 (build_call_expr (offload_register, 4,
2451 build_int_cstu (unsigned_type_node,
2452 GOMP_VERSION_PACK (GOMP_VERSION,
2453 GOMP_VERSION_HSA)),
2454 build_fold_addr_expr (hsa_libgomp_host_table),
2455 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2456 build_fold_addr_expr (hsa_img_descriptor)),
2457 hsa_ctor_stmts);
2459 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2461 tree offload_unregister
2462 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2463 gcc_checking_assert (offload_unregister);
2465 tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2466 append_to_statement_list
2467 (build_call_expr (offload_unregister, 4,
2468 build_int_cstu (unsigned_type_node,
2469 GOMP_VERSION_PACK (GOMP_VERSION,
2470 GOMP_VERSION_HSA)),
2471 build_fold_addr_expr (hsa_libgomp_host_table),
2472 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2473 build_fold_addr_expr (hsa_img_descriptor)),
2474 hsa_dtor_stmts);
2475 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2478 /* Emit the brig module we have compiled to a section in the final assembly and
2479 also create a compile unit static constructor that will register the brig
2480 module with libgomp. */
2482 void
2483 hsa_output_brig (void)
2485 section *saved_section;
2487 if (!brig_initialized)
2488 return;
2490 for (unsigned i = 0; i < function_call_linkage.length (); i++)
2492 function_linkage_pair p = function_call_linkage[i];
2494 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2495 gcc_assert (*func_offset);
2496 BrigOperandCodeRef *code_ref
2497 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2498 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2499 code_ref->ref = lendian32 (*func_offset);
2502 /* Iterate all function declarations and if we meet a function that should
2503 have module linkage and we are unable to emit HSAIL for the function,
2504 then change the linkage to program linkage. Doing so, we will emit
2505 a valid BRIG image. */
2506 if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2507 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2508 = emitted_declarations->begin ();
2509 it != emitted_declarations->end ();
2510 ++it)
2512 if (hsa_failed_functions->contains ((*it).first))
2513 (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2516 saved_section = in_section;
2518 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2519 char tmp_name[64];
2520 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2521 ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2522 tree brig_id = get_identifier (tmp_name);
2523 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2524 char_type_node);
2525 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2526 TREE_ADDRESSABLE (brig_decl) = 1;
2527 TREE_READONLY (brig_decl) = 1;
2528 DECL_ARTIFICIAL (brig_decl) = 1;
2529 DECL_IGNORED_P (brig_decl) = 1;
2530 TREE_STATIC (brig_decl) = 1;
2531 TREE_PUBLIC (brig_decl) = 0;
2532 TREE_USED (brig_decl) = 1;
2533 DECL_INITIAL (brig_decl) = brig_decl;
2534 TREE_ASM_WRITTEN (brig_decl) = 1;
2536 BrigModuleHeader module_header;
2537 memcpy (&module_header.identification, "HSA BRIG",
2538 sizeof (module_header.identification));
2539 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2540 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2541 uint64_t section_index[3];
2543 int data_padding, code_padding, operand_padding;
2544 data_padding = HSA_SECTION_ALIGNMENT
2545 - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2546 code_padding = HSA_SECTION_ALIGNMENT
2547 - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2548 operand_padding = HSA_SECTION_ALIGNMENT
2549 - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2551 uint64_t module_size = sizeof (module_header)
2552 + sizeof (section_index)
2553 + brig_data.total_size
2554 + data_padding
2555 + brig_code.total_size
2556 + code_padding
2557 + brig_operand.total_size
2558 + operand_padding;
2559 gcc_assert ((module_size % 16) == 0);
2560 module_header.byteCount = lendian64 (module_size);
2561 memset (&module_header.hash, 0, sizeof (module_header.hash));
2562 module_header.reserved = 0;
2563 module_header.sectionCount = lendian32 (3);
2564 module_header.sectionIndex = lendian64 (sizeof (module_header));
2565 assemble_string ((const char *) &module_header, sizeof (module_header));
2566 uint64_t off = sizeof (module_header) + sizeof (section_index);
2567 section_index[0] = lendian64 (off);
2568 off += brig_data.total_size + data_padding;
2569 section_index[1] = lendian64 (off);
2570 off += brig_code.total_size + code_padding;
2571 section_index[2] = lendian64 (off);
2572 assemble_string ((const char *) &section_index, sizeof (section_index));
2574 char padding[HSA_SECTION_ALIGNMENT];
2575 memset (padding, 0, sizeof (padding));
2577 brig_data.output ();
2578 assemble_string (padding, data_padding);
2579 brig_code.output ();
2580 assemble_string (padding, code_padding);
2581 brig_operand.output ();
2582 assemble_string (padding, operand_padding);
2584 if (saved_section)
2585 switch_to_section (saved_section);
2587 hsa_output_libgomp_mapping (brig_decl);
2589 hsa_free_decl_kernel_mapping ();
2590 brig_release_data ();
2591 hsa_deinit_compilation_unit_data ();
2593 delete emitted_declarations;
2594 emitted_declarations = NULL;
2595 delete function_offsets;
2596 function_offsets = NULL;