* gimple-ssa-store-merging.c (struct store_immediate_info): Add
[official-gcc.git] / gcc / hsa-brig.c
blobd15ce261ed2c543cb56fddae029a55eab69ed4d8
1 /* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2017 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "target.h"
27 #include "memmodel.h"
28 #include "tm_p.h"
29 #include "is-a.h"
30 #include "vec.h"
31 #include "hash-table.h"
32 #include "hash-map.h"
33 #include "tree.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "cfg.h"
39 #include "function.h"
40 #include "fold-const.h"
41 #include "stringpool.h"
42 #include "gimple-pretty-print.h"
43 #include "diagnostic-core.h"
44 #include "cgraph.h"
45 #include "dumpfile.h"
46 #include "print-tree.h"
47 #include "symbol-summary.h"
48 #include "hsa-common.h"
49 #include "gomp-constants.h"
51 /* Convert VAL to little endian form, if necessary. */
53 static uint16_t
54 lendian16 (uint16_t val)
56 #if GCC_VERSION >= 4008
57 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
58 return val;
59 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
60 return __builtin_bswap16 (val);
61 #else /* __ORDER_PDP_ENDIAN__ */
62 return val;
63 #endif
64 #else
65 // provide a safe slower default, with shifts and masking
66 #ifndef WORDS_BIGENDIAN
67 return val;
68 #else
69 return (val >> 8) | (val << 8);
70 #endif
71 #endif
74 /* Convert VAL to little endian form, if necessary. */
76 static uint32_t
77 lendian32 (uint32_t val)
79 #if GCC_VERSION >= 4006
80 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
81 return val;
82 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
83 return __builtin_bswap32 (val);
84 #else /* __ORDER_PDP_ENDIAN__ */
85 return (val >> 16) | (val << 16);
86 #endif
87 #else
88 // provide a safe slower default, with shifts and masking
89 #ifndef WORDS_BIGENDIAN
90 return val;
91 #else
92 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
93 return (val >> 16) | (val << 16);
94 #endif
95 #endif
98 /* Convert VAL to little endian form, if necessary. */
100 static uint64_t
101 lendian64 (uint64_t val)
103 #if GCC_VERSION >= 4006
104 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
105 return val;
106 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
107 return __builtin_bswap64 (val);
108 #else /* __ORDER_PDP_ENDIAN__ */
109 return (((val & 0xffffll) << 48)
110 | ((val & 0xffff0000ll) << 16)
111 | ((val & 0xffff00000000ll) >> 16)
112 | ((val & 0xffff000000000000ll) >> 48));
113 #endif
114 #else
115 // provide a safe slower default, with shifts and masking
116 #ifndef WORDS_BIGENDIAN
117 return val;
118 #else
119 val = (((val & 0xff00ff00ff00ff00ll) >> 8)
120 | ((val & 0x00ff00ff00ff00ffll) << 8));
121 val = ((( val & 0xffff0000ffff0000ll) >> 16)
122 | (( val & 0x0000ffff0000ffffll) << 16));
123 return (val >> 32) | (val << 32);
124 #endif
125 #endif
128 #define BRIG_ELF_SECTION_NAME ".brig"
129 #define BRIG_LABEL_STRING "hsa_brig"
130 #define BRIG_SECTION_DATA_NAME "hsa_data"
131 #define BRIG_SECTION_CODE_NAME "hsa_code"
132 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
134 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
136 /* Required HSA section alignment. */
138 #define HSA_SECTION_ALIGNMENT 16
140 /* Chunks of BRIG binary data. */
142 struct hsa_brig_data_chunk
144 /* Size of the data already stored into a chunk. */
145 unsigned size;
147 /* Pointer to the data. */
148 char *data;
151 /* Structure representing a BRIG section, holding and writing its data. */
153 class hsa_brig_section
155 public:
156 /* Section name that will be output to the BRIG. */
157 const char *section_name;
158 /* Size in bytes of all data stored in the section. */
159 unsigned total_size;
160 /* The size of the header of the section including padding. */
161 unsigned header_byte_count;
162 /* The size of the header of the section without any padding. */
163 unsigned header_byte_delta;
165 void init (const char *name);
166 void release ();
167 void output ();
168 unsigned add (const void *data, unsigned len, void **output = NULL);
169 void round_size_up (int factor);
170 void *get_ptr_by_offset (unsigned int offset);
172 private:
173 void allocate_new_chunk ();
175 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
176 vec <struct hsa_brig_data_chunk> chunks;
178 /* More convenient access to the last chunk from the vector above. */
179 struct hsa_brig_data_chunk *cur_chunk;
182 static struct hsa_brig_section brig_data, brig_code, brig_operand;
183 static uint32_t brig_insn_count;
184 static bool brig_initialized = false;
186 /* Mapping between emitted HSA functions and their offset in code segment. */
187 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
189 /* Hash map of emitted function declarations. */
190 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
192 /* Hash table of emitted internal function declaration offsets. */
193 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
195 /* List of sbr instructions. */
196 static vec <hsa_insn_sbr *> *switch_instructions;
198 struct function_linkage_pair
200 function_linkage_pair (tree decl, unsigned int off)
201 : function_decl (decl), offset (off) {}
203 /* Declaration of called function. */
204 tree function_decl;
206 /* Offset in operand section. */
207 unsigned int offset;
210 /* Vector of function calls where we need to resolve function offsets. */
211 static auto_vec <function_linkage_pair> function_call_linkage;
213 /* Add a new chunk, allocate data for it and initialize it. */
215 void
216 hsa_brig_section::allocate_new_chunk ()
218 struct hsa_brig_data_chunk new_chunk;
220 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
221 new_chunk.size = 0;
222 cur_chunk = chunks.safe_push (new_chunk);
225 /* Initialize the brig section. */
227 void
228 hsa_brig_section::init (const char *name)
230 section_name = name;
231 /* While the following computation is basically wrong, because the intent
232 certainly wasn't to have the first character of name and padding, which
233 are a part of sizeof (BrigSectionHeader), included in the first addend,
234 this is what the disassembler expects. */
235 total_size = sizeof (BrigSectionHeader) + strlen (section_name);
236 chunks.create (1);
237 allocate_new_chunk ();
238 header_byte_delta = total_size;
239 round_size_up (4);
240 header_byte_count = total_size;
243 /* Free all data in the section. */
245 void
246 hsa_brig_section::release ()
248 for (unsigned i = 0; i < chunks.length (); i++)
249 free (chunks[i].data);
250 chunks.release ();
251 cur_chunk = NULL;
254 /* Write the section to the output file to a section with the name given at
255 initialization. Switches the output section and does not restore it. */
257 void
258 hsa_brig_section::output ()
260 struct BrigSectionHeader section_header;
261 char padding[8];
263 section_header.byteCount = lendian64 (total_size);
264 section_header.headerByteCount = lendian32 (header_byte_count);
265 section_header.nameLength = lendian32 (strlen (section_name));
266 assemble_string ((const char *) &section_header, 16);
267 assemble_string (section_name, (section_header.nameLength));
268 memset (&padding, 0, sizeof (padding));
269 /* This is also a consequence of the wrong header size computation described
270 in a comment in hsa_brig_section::init. */
271 assemble_string (padding, 8);
272 for (unsigned i = 0; i < chunks.length (); i++)
273 assemble_string (chunks[i].data, chunks[i].size);
276 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
277 which it was stored. If OUTPUT is not NULL, store into it the pointer to
278 the place where DATA was actually stored. */
280 unsigned
281 hsa_brig_section::add (const void *data, unsigned len, void **output)
283 unsigned offset = total_size;
285 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
286 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
287 allocate_new_chunk ();
289 char *dst = cur_chunk->data + cur_chunk->size;
290 memcpy (dst, data, len);
291 if (output)
292 *output = dst;
293 cur_chunk->size += len;
294 total_size += len;
296 return offset;
299 /* Add padding to section so that its size is divisible by FACTOR. */
301 void
302 hsa_brig_section::round_size_up (int factor)
304 unsigned padding, res = total_size % factor;
306 if (res == 0)
307 return;
309 padding = factor - res;
310 total_size += padding;
311 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
313 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
314 cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
315 allocate_new_chunk ();
318 cur_chunk->size += padding;
321 /* Return pointer to data by global OFFSET in the section. */
323 void *
324 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
326 gcc_assert (offset < total_size);
327 offset -= header_byte_delta;
329 unsigned i;
330 for (i = 0; offset >= chunks[i].size; i++)
331 offset -= chunks[i].size;
333 return chunks[i].data + offset;
336 /* BRIG string data hashing. */
338 struct brig_string_slot
340 const char *s;
341 char prefix;
342 int len;
343 uint32_t offset;
346 /* Hash table helpers. */
348 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
350 static inline hashval_t hash (const value_type);
351 static inline bool equal (const value_type, const compare_type);
352 static inline void remove (value_type);
355 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
356 to support strings that may not end in '\0'. */
358 inline hashval_t
359 brig_string_slot_hasher::hash (const value_type ds)
361 hashval_t r = ds->len;
362 int i;
364 for (i = 0; i < ds->len; i++)
365 r = r * 67 + (unsigned) ds->s[i] - 113;
366 r = r * 67 + (unsigned) ds->prefix - 113;
367 return r;
370 /* Returns nonzero if DS1 and DS2 are equal. */
372 inline bool
373 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
375 if (ds1->len == ds2->len)
376 return ds1->prefix == ds2->prefix
377 && memcmp (ds1->s, ds2->s, ds1->len) == 0;
379 return 0;
382 /* Deallocate memory for DS upon its removal. */
384 inline void
385 brig_string_slot_hasher::remove (value_type ds)
387 free (const_cast<char *> (ds->s));
388 free (ds);
391 /* Hash for strings we output in order not to duplicate them needlessly. */
393 static hash_table<brig_string_slot_hasher> *brig_string_htab;
395 /* Emit a null terminated string STR to the data section and return its
396 offset in it. If PREFIX is non-zero, output it just before STR too.
397 Sanitize the string if SANITIZE option is set to true. */
399 static unsigned
400 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
402 unsigned slen = strlen (str);
403 unsigned offset, len = slen + (prefix ? 1 : 0);
404 uint32_t hdr_len = lendian32 (len);
405 brig_string_slot s_slot;
406 brig_string_slot **slot;
407 char *str2;
409 str2 = xstrdup (str);
411 if (sanitize)
412 hsa_sanitize_name (str2);
413 s_slot.s = str2;
414 s_slot.len = slen;
415 s_slot.prefix = prefix;
416 s_slot.offset = 0;
418 slot = brig_string_htab->find_slot (&s_slot, INSERT);
419 if (*slot == NULL)
421 brig_string_slot *new_slot = XCNEW (brig_string_slot);
423 /* In theory we should fill in BrigData but that would mean copying
424 the string to a buffer for no reason, so we just emulate it. */
425 offset = brig_data.add (&hdr_len, sizeof (hdr_len));
426 if (prefix)
427 brig_data.add (&prefix, 1);
429 brig_data.add (str2, slen);
430 brig_data.round_size_up (4);
432 /* TODO: could use the string we just copied into
433 brig_string->cur_chunk */
434 new_slot->s = str2;
435 new_slot->len = slen;
436 new_slot->prefix = prefix;
437 new_slot->offset = offset;
438 *slot = new_slot;
440 else
442 offset = (*slot)->offset;
443 free (str2);
446 return offset;
449 /* Linked list of queued operands. */
451 static struct operand_queue
453 /* First from the chain of queued operands. */
454 hsa_op_base *first_op, *last_op;
456 /* The offset at which the next operand will be enqueued. */
457 unsigned projected_size;
459 } op_queue;
461 /* Unless already initialized, initialize infrastructure to produce BRIG. */
463 static void
464 brig_init (void)
466 brig_insn_count = 0;
468 if (brig_initialized)
469 return;
471 brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
472 brig_data.init (BRIG_SECTION_DATA_NAME);
473 brig_code.init (BRIG_SECTION_CODE_NAME);
474 brig_operand.init (BRIG_SECTION_OPERAND_NAME);
475 brig_initialized = true;
477 struct BrigDirectiveModule moddir;
478 memset (&moddir, 0, sizeof (moddir));
479 moddir.base.byteCount = lendian16 (sizeof (moddir));
481 char *modname;
482 if (main_input_filename && *main_input_filename != '\0')
484 const char *part = strrchr (main_input_filename, '/');
485 if (!part)
486 part = main_input_filename;
487 else
488 part++;
489 modname = concat ("&__hsa_module_", part, NULL);
490 char *extension = strchr (modname, '.');
491 if (extension)
492 *extension = '\0';
494 /* As in LTO mode, we have to emit a different module names. */
495 if (flag_ltrans)
497 part = strrchr (asm_file_name, '/');
498 if (!part)
499 part = asm_file_name;
500 else
501 part++;
502 char *modname2;
503 modname2 = xasprintf ("%s_%s", modname, part);
504 free (modname);
505 modname = modname2;
508 hsa_sanitize_name (modname);
509 moddir.name = brig_emit_string (modname);
510 free (modname);
512 else
513 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
514 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
515 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
516 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
517 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
518 if (hsa_machine_large_p ())
519 moddir.machineModel = BRIG_MACHINE_LARGE;
520 else
521 moddir.machineModel = BRIG_MACHINE_SMALL;
522 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
523 brig_code.add (&moddir, sizeof (moddir));
526 /* Free all BRIG data. */
528 static void
529 brig_release_data (void)
531 delete brig_string_htab;
532 brig_data.release ();
533 brig_code.release ();
534 brig_operand.release ();
536 brig_initialized = 0;
539 /* Enqueue operation OP. Return the offset at which it will be stored. */
541 static unsigned int
542 enqueue_op (hsa_op_base *op)
544 unsigned ret;
546 if (op->m_brig_op_offset)
547 return op->m_brig_op_offset;
549 ret = op_queue.projected_size;
550 op->m_brig_op_offset = op_queue.projected_size;
552 if (!op_queue.first_op)
553 op_queue.first_op = op;
554 else
555 op_queue.last_op->m_next = op;
556 op_queue.last_op = op;
558 if (is_a <hsa_op_immed *> (op))
559 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
560 else if (is_a <hsa_op_reg *> (op))
561 op_queue.projected_size += sizeof (struct BrigOperandRegister);
562 else if (is_a <hsa_op_address *> (op))
563 op_queue.projected_size += sizeof (struct BrigOperandAddress);
564 else if (is_a <hsa_op_code_ref *> (op))
565 op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
566 else if (is_a <hsa_op_code_list *> (op))
567 op_queue.projected_size += sizeof (struct BrigOperandCodeList);
568 else if (is_a <hsa_op_operand_list *> (op))
569 op_queue.projected_size += sizeof (struct BrigOperandOperandList);
570 else
571 gcc_unreachable ();
572 return ret;
575 static void emit_immediate_operand (hsa_op_immed *imm);
577 /* Emit directive describing a symbol if it has not been emitted already.
578 Return the offset of the directive. */
580 static unsigned
581 emit_directive_variable (struct hsa_symbol *symbol)
583 struct BrigDirectiveVariable dirvar;
584 unsigned name_offset;
585 static unsigned res_name_offset;
587 if (symbol->m_directive_offset)
588 return symbol->m_directive_offset;
590 memset (&dirvar, 0, sizeof (dirvar));
591 dirvar.base.byteCount = lendian16 (sizeof (dirvar));
592 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
593 dirvar.allocation = symbol->m_allocation;
595 char prefix = symbol->m_global_scope_p ? '&' : '%';
597 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
599 if (res_name_offset == 0)
600 res_name_offset = brig_emit_string (symbol->m_name, '%');
601 name_offset = res_name_offset;
603 else if (symbol->m_name)
604 name_offset = brig_emit_string (symbol->m_name, prefix);
605 else
607 char buf[64];
608 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
609 symbol->m_name_number);
610 name_offset = brig_emit_string (buf, prefix);
613 dirvar.name = lendian32 (name_offset);
615 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL)
617 hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl));
618 dirvar.init = lendian32 (enqueue_op (tmp));
620 else
621 dirvar.init = 0;
622 dirvar.type = lendian16 (symbol->m_type);
623 dirvar.segment = symbol->m_segment;
624 dirvar.align = symbol->m_align;
625 dirvar.linkage = symbol->m_linkage;
626 dirvar.dim.lo = symbol->m_dim;
627 dirvar.dim.hi = symbol->m_dim >> 32;
629 /* Global variables are just declared and linked via HSA runtime. */
630 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
631 dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
632 dirvar.reserved = 0;
634 if (symbol->m_cst_value)
636 dirvar.modifier |= BRIG_VARIABLE_CONST;
637 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
640 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
641 return symbol->m_directive_offset;
644 /* Emit directives describing either a function declaration or definition F and
645 return the produced BrigDirectiveExecutable structure. The function does
646 not take into account any instructions when calculating nextModuleEntry
647 field of the produced BrigDirectiveExecutable structure so when emitting
648 actual definitions, this field needs to be updated after all of the function
649 is actually added to the code section. */
651 static BrigDirectiveExecutable *
652 emit_function_directives (hsa_function_representation *f, bool is_declaration)
654 struct BrigDirectiveExecutable fndir;
655 unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
656 int count = 0;
657 void *ptr_to_fndir;
658 hsa_symbol *sym;
660 if (!f->m_declaration_p)
661 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
663 gcc_assert (!sym->m_emitted_to_brig);
664 sym->m_emitted_to_brig = true;
665 emit_directive_variable (sym);
666 brig_insn_count++;
669 name_offset = brig_emit_string (f->m_name, '&');
670 inarg_off = brig_code.total_size + sizeof (fndir)
671 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
672 scoped_off = inarg_off
673 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
675 if (!f->m_declaration_p)
677 count += f->m_spill_symbols.length ();
678 count += f->m_private_variables.length ();
681 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
683 memset (&fndir, 0, sizeof (fndir));
684 fndir.base.byteCount = lendian16 (sizeof (fndir));
685 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
686 : BRIG_KIND_DIRECTIVE_FUNCTION);
687 fndir.name = lendian32 (name_offset);
688 fndir.inArgCount = lendian16 (f->m_input_args.length ());
689 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
690 fndir.firstInArg = lendian32 (inarg_off);
691 fndir.firstCodeBlockEntry = lendian32 (scoped_off);
692 fndir.nextModuleEntry = lendian32 (next_toplev_off);
693 fndir.linkage = f->get_linkage ();
694 if (!f->m_declaration_p)
695 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
696 memset (&fndir.reserved, 0, sizeof (fndir.reserved));
698 /* Once we put a definition of function_offsets, we should not overwrite
699 it with a declaration of the function. */
700 if (f->m_internal_fn == NULL)
702 if (!function_offsets->get (f->m_decl) || !is_declaration)
703 function_offsets->put (f->m_decl, brig_code.total_size);
705 else
707 /* Internal function. */
708 hsa_internal_fn **slot
709 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
710 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
711 int_fn->m_offset = brig_code.total_size;
712 *slot = int_fn;
715 brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir);
717 if (f->m_output_arg)
718 emit_directive_variable (f->m_output_arg);
719 for (unsigned i = 0; i < f->m_input_args.length (); i++)
720 emit_directive_variable (f->m_input_args[i]);
722 if (!f->m_declaration_p)
724 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
726 emit_directive_variable (sym);
727 brig_insn_count++;
729 for (unsigned i = 0; i < f->m_private_variables.length (); i++)
731 emit_directive_variable (f->m_private_variables[i]);
732 brig_insn_count++;
736 return (BrigDirectiveExecutable *) ptr_to_fndir;
739 /* Emit a label directive for the given HBB. We assume it is about to start on
740 the current offset in the code section. */
742 static void
743 emit_bb_label_directive (hsa_bb *hbb)
745 struct BrigDirectiveLabel lbldir;
747 lbldir.base.byteCount = lendian16 (sizeof (lbldir));
748 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
749 char buf[32];
750 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
751 hbb->m_index);
752 lbldir.name = lendian32 (brig_emit_string (buf, '@'));
754 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
755 sizeof (lbldir));
756 brig_insn_count++;
759 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
760 holding such, for constants and registers. */
762 static BrigType16_t
763 regtype_for_type (BrigType16_t t)
765 switch (t)
767 case BRIG_TYPE_B1:
768 return BRIG_TYPE_B1;
770 case BRIG_TYPE_U8:
771 case BRIG_TYPE_U16:
772 case BRIG_TYPE_U32:
773 case BRIG_TYPE_S8:
774 case BRIG_TYPE_S16:
775 case BRIG_TYPE_S32:
776 case BRIG_TYPE_B8:
777 case BRIG_TYPE_B16:
778 case BRIG_TYPE_B32:
779 case BRIG_TYPE_F16:
780 case BRIG_TYPE_F32:
781 case BRIG_TYPE_U8X4:
782 case BRIG_TYPE_U16X2:
783 case BRIG_TYPE_S8X4:
784 case BRIG_TYPE_S16X2:
785 case BRIG_TYPE_F16X2:
786 return BRIG_TYPE_B32;
788 case BRIG_TYPE_U64:
789 case BRIG_TYPE_S64:
790 case BRIG_TYPE_F64:
791 case BRIG_TYPE_B64:
792 case BRIG_TYPE_U8X8:
793 case BRIG_TYPE_U16X4:
794 case BRIG_TYPE_U32X2:
795 case BRIG_TYPE_S8X8:
796 case BRIG_TYPE_S16X4:
797 case BRIG_TYPE_S32X2:
798 case BRIG_TYPE_F16X4:
799 case BRIG_TYPE_F32X2:
800 return BRIG_TYPE_B64;
802 case BRIG_TYPE_B128:
803 case BRIG_TYPE_U8X16:
804 case BRIG_TYPE_U16X8:
805 case BRIG_TYPE_U32X4:
806 case BRIG_TYPE_U64X2:
807 case BRIG_TYPE_S8X16:
808 case BRIG_TYPE_S16X8:
809 case BRIG_TYPE_S32X4:
810 case BRIG_TYPE_S64X2:
811 case BRIG_TYPE_F16X8:
812 case BRIG_TYPE_F32X4:
813 case BRIG_TYPE_F64X2:
814 return BRIG_TYPE_B128;
816 default:
817 gcc_unreachable ();
821 /* Return the length of the BRIG type TYPE that is going to be streamed out as
822 an immediate constant (so it must not be B1). */
824 unsigned
825 hsa_get_imm_brig_type_len (BrigType16_t type)
827 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
828 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
830 switch (pack_type)
832 case BRIG_TYPE_PACK_NONE:
833 break;
834 case BRIG_TYPE_PACK_32:
835 return 4;
836 case BRIG_TYPE_PACK_64:
837 return 8;
838 case BRIG_TYPE_PACK_128:
839 return 16;
840 default:
841 gcc_unreachable ();
844 switch (base_type)
846 case BRIG_TYPE_U8:
847 case BRIG_TYPE_S8:
848 case BRIG_TYPE_B8:
849 return 1;
850 case BRIG_TYPE_U16:
851 case BRIG_TYPE_S16:
852 case BRIG_TYPE_F16:
853 case BRIG_TYPE_B16:
854 return 2;
855 case BRIG_TYPE_U32:
856 case BRIG_TYPE_S32:
857 case BRIG_TYPE_F32:
858 case BRIG_TYPE_B32:
859 return 4;
860 case BRIG_TYPE_U64:
861 case BRIG_TYPE_S64:
862 case BRIG_TYPE_F64:
863 case BRIG_TYPE_B64:
864 return 8;
865 case BRIG_TYPE_B128:
866 return 16;
867 default:
868 gcc_unreachable ();
872 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
873 If NEED_LEN is not equal to zero, shrink or extend the value
874 to NEED_LEN bytes. Return how many bytes were written. */
876 static int
877 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
879 union hsa_bytes bytes;
881 memset (&bytes, 0, sizeof (bytes));
882 tree type = TREE_TYPE (value);
883 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
885 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
886 if (INTEGRAL_TYPE_P (type)
887 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
888 switch (data_len)
890 case 1:
891 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
892 break;
893 case 2:
894 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
895 break;
896 case 4:
897 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
898 break;
899 case 8:
900 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
901 break;
902 default:
903 gcc_unreachable ();
905 else if (SCALAR_FLOAT_TYPE_P (type))
907 if (data_len == 2)
909 sorry ("Support for HSA does not implement immediate 16 bit FPU "
910 "operands");
911 return 2;
913 unsigned int_len = GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type));
914 /* There are always 32 bits in each long, no matter the size of
915 the hosts long. */
916 long tmp[6];
918 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
920 if (int_len == 4)
921 bytes.b32 = (uint32_t) tmp[0];
922 else
924 bytes.b64 = (uint64_t)(uint32_t) tmp[1];
925 bytes.b64 <<= 32;
926 bytes.b64 |= (uint32_t) tmp[0];
929 else
930 gcc_unreachable ();
932 int len;
933 if (need_len == 0)
934 len = data_len;
935 else
936 len = need_len;
938 memcpy (data, &bytes, len);
939 return len;
942 char *
943 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
945 char *brig_repr;
946 *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
948 if (m_tree_value != NULL_TREE)
950 /* Update brig_repr_size for special tree values. */
951 if (TREE_CODE (m_tree_value) == STRING_CST)
952 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
953 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
954 *brig_repr_size
955 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
957 unsigned total_len = *brig_repr_size;
959 /* As we can have a constructor with fewer elements, fill the memory
960 with zeros. */
961 brig_repr = XCNEWVEC (char, total_len);
962 char *p = brig_repr;
964 if (TREE_CODE (m_tree_value) == VECTOR_CST)
966 int i, num = VECTOR_CST_NELTS (m_tree_value);
967 for (i = 0; i < num; i++)
969 tree v = VECTOR_CST_ELT (m_tree_value, i);
970 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
971 total_len -= actual;
972 p += actual;
974 /* Vectors should have the exact size. */
975 gcc_assert (total_len == 0);
977 else if (TREE_CODE (m_tree_value) == STRING_CST)
978 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
979 TREE_STRING_LENGTH (m_tree_value));
980 else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
982 gcc_assert (total_len % 2 == 0);
983 unsigned actual;
984 actual
985 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
986 total_len / 2);
988 gcc_assert (actual == total_len / 2);
989 p += actual;
991 actual
992 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
993 total_len / 2);
994 gcc_assert (actual == total_len / 2);
996 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
998 unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
999 for (unsigned i = 0; i < len; i++)
1001 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
1002 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
1003 total_len -= actual;
1004 p += actual;
1007 else
1008 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
1010 else
1012 hsa_bytes bytes;
1014 switch (*brig_repr_size)
1016 case 1:
1017 bytes.b8 = (uint8_t) m_int_value;
1018 break;
1019 case 2:
1020 bytes.b16 = (uint16_t) m_int_value;
1021 break;
1022 case 4:
1023 bytes.b32 = (uint32_t) m_int_value;
1024 break;
1025 case 8:
1026 bytes.b64 = (uint64_t) m_int_value;
1027 break;
1028 default:
1029 gcc_unreachable ();
1032 brig_repr = XNEWVEC (char, *brig_repr_size);
1033 memcpy (brig_repr, &bytes, *brig_repr_size);
1036 return brig_repr;
1039 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1040 have been massaged to comply with various HSA/BRIG type requirements, so the
1041 only important aspect of that is the length (because HSAIL might expect
1042 smaller constants or become bit-data). The data should be represented
1043 according to what is in the tree representation. */
1045 static void
1046 emit_immediate_operand (hsa_op_immed *imm)
1048 unsigned brig_repr_size;
1049 char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1050 struct BrigOperandConstantBytes out;
1052 memset (&out, 0, sizeof (out));
1053 out.base.byteCount = lendian16 (sizeof (out));
1054 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1055 uint32_t byteCount = lendian32 (brig_repr_size);
1056 out.type = lendian16 (imm->m_type);
1057 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1058 brig_operand.add (&out, sizeof (out));
1059 brig_data.add (brig_repr, brig_repr_size);
1060 brig_data.round_size_up (4);
1062 free (brig_repr);
1065 /* Emit a register BRIG operand REG. */
1067 static void
1068 emit_register_operand (hsa_op_reg *reg)
1070 struct BrigOperandRegister out;
1072 out.base.byteCount = lendian16 (sizeof (out));
1073 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1074 out.regNum = lendian32 (reg->m_hard_num);
1076 switch (regtype_for_type (reg->m_type))
1078 case BRIG_TYPE_B32:
1079 out.regKind = BRIG_REGISTER_KIND_SINGLE;
1080 break;
1081 case BRIG_TYPE_B64:
1082 out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1083 break;
1084 case BRIG_TYPE_B128:
1085 out.regKind = BRIG_REGISTER_KIND_QUAD;
1086 break;
1087 case BRIG_TYPE_B1:
1088 out.regKind = BRIG_REGISTER_KIND_CONTROL;
1089 break;
1090 default:
1091 gcc_unreachable ();
1094 brig_operand.add (&out, sizeof (out));
1097 /* Emit an address BRIG operand ADDR. */
1099 static void
1100 emit_address_operand (hsa_op_address *addr)
1102 struct BrigOperandAddress out;
1104 out.base.byteCount = lendian16 (sizeof (out));
1105 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1106 out.symbol = addr->m_symbol
1107 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1108 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1110 if (sizeof (addr->m_imm_offset) == 8)
1112 out.offset.lo = lendian32 (addr->m_imm_offset);
1113 out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1115 else
1117 gcc_assert (sizeof (addr->m_imm_offset) == 4);
1118 out.offset.lo = lendian32 (addr->m_imm_offset);
1119 out.offset.hi = 0;
1122 brig_operand.add (&out, sizeof (out));
1125 /* Emit a code reference operand REF. */
1127 static void
1128 emit_code_ref_operand (hsa_op_code_ref *ref)
1130 struct BrigOperandCodeRef out;
1132 out.base.byteCount = lendian16 (sizeof (out));
1133 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1134 out.ref = lendian32 (ref->m_directive_offset);
1135 brig_operand.add (&out, sizeof (out));
1138 /* Emit a code list operand CODE_LIST. */
1140 static void
1141 emit_code_list_operand (hsa_op_code_list *code_list)
1143 struct BrigOperandCodeList out;
1144 unsigned args = code_list->m_offsets.length ();
1146 for (unsigned i = 0; i < args; i++)
1147 gcc_assert (code_list->m_offsets[i]);
1149 out.base.byteCount = lendian16 (sizeof (out));
1150 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1152 uint32_t byteCount = lendian32 (4 * args);
1154 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1155 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1156 brig_data.round_size_up (4);
1157 brig_operand.add (&out, sizeof (out));
1160 /* Emit an operand list operand OPERAND_LIST. */
1162 static void
1163 emit_operand_list_operand (hsa_op_operand_list *operand_list)
1165 struct BrigOperandOperandList out;
1166 unsigned args = operand_list->m_offsets.length ();
1168 for (unsigned i = 0; i < args; i++)
1169 gcc_assert (operand_list->m_offsets[i]);
1171 out.base.byteCount = lendian16 (sizeof (out));
1172 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1174 uint32_t byteCount = lendian32 (4 * args);
1176 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1177 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1178 brig_data.round_size_up (4);
1179 brig_operand.add (&out, sizeof (out));
1182 /* Emit all operands queued for writing. */
1184 static void
1185 emit_queued_operands (void)
1187 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1189 gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1190 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1191 emit_immediate_operand (imm);
1192 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1193 emit_register_operand (reg);
1194 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1195 emit_address_operand (addr);
1196 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1197 emit_code_ref_operand (ref);
1198 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1199 emit_code_list_operand (code_list);
1200 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1201 emit_operand_list_operand (l);
1202 else
1203 gcc_unreachable ();
1207 /* Emit directives describing the function that is used for
1208 a function declaration. */
1210 static BrigDirectiveExecutable *
1211 emit_function_declaration (tree decl)
1213 hsa_function_representation *f = hsa_generate_function_declaration (decl);
1215 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1216 emit_queued_operands ();
1218 delete f;
1220 return e;
1223 /* Emit directives describing the function that is used for
1224 an internal function declaration. */
1226 static BrigDirectiveExecutable *
1227 emit_internal_fn_decl (hsa_internal_fn *fn)
1229 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1231 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1232 emit_queued_operands ();
1234 delete f;
1236 return e;
1239 /* Enqueue all operands of INSN and return offset to BRIG data section
1240 to list of operand offsets. */
1242 static unsigned
1243 emit_insn_operands (hsa_insn_basic *insn)
1245 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1246 operand_offsets;
1248 unsigned l = insn->operand_count ();
1250 /* We have N operands so use 4 * N for the byte_count. */
1251 uint32_t byte_count = lendian32 (4 * l);
1252 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1253 if (l > 0)
1255 operand_offsets.safe_grow (l);
1256 for (unsigned i = 0; i < l; i++)
1257 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1259 brig_data.add (operand_offsets.address (),
1260 l * sizeof (BrigOperandOffset32_t));
1262 brig_data.round_size_up (4);
1263 return offset;
1266 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1267 to BRIG data section to list of operand offsets. */
1269 static unsigned
1270 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1271 hsa_op_base *op2 = NULL)
1273 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1274 operand_offsets;
1276 gcc_checking_assert (op0 != NULL);
1277 operand_offsets.safe_push (enqueue_op (op0));
1279 if (op1 != NULL)
1281 operand_offsets.safe_push (enqueue_op (op1));
1282 if (op2 != NULL)
1283 operand_offsets.safe_push (enqueue_op (op2));
1286 unsigned l = operand_offsets.length ();
1288 /* We have N operands so use 4 * N for the byte_count. */
1289 uint32_t byte_count = lendian32 (4 * l);
1291 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1292 brig_data.add (operand_offsets.address (),
1293 l * sizeof (BrigOperandOffset32_t));
1295 brig_data.round_size_up (4);
1297 return offset;
1300 /* Emit an HSA memory instruction and all necessary directives, schedule
1301 necessary operands for writing. */
1303 static void
1304 emit_memory_insn (hsa_insn_mem *mem)
1306 struct BrigInstMem repr;
1307 gcc_checking_assert (mem->operand_count () == 2);
1309 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1311 /* This is necessary because of the erroneous typedef of
1312 BrigMemoryModifier8_t which introduces padding which may then contain
1313 random stuff (which we do not want so that we can test things don't
1314 change). */
1315 memset (&repr, 0, sizeof (repr));
1316 repr.base.base.byteCount = lendian16 (sizeof (repr));
1317 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1318 repr.base.opcode = lendian16 (mem->m_opcode);
1319 repr.base.type = lendian16 (mem->m_type);
1320 repr.base.operands = lendian32 (emit_insn_operands (mem));
1322 if (addr->m_symbol)
1323 repr.segment = addr->m_symbol->m_segment;
1324 else
1325 repr.segment = BRIG_SEGMENT_FLAT;
1326 repr.modifier = 0;
1327 repr.equivClass = mem->m_equiv_class;
1328 repr.align = mem->m_align;
1329 if (mem->m_opcode == BRIG_OPCODE_LD)
1330 repr.width = BRIG_WIDTH_1;
1331 else
1332 repr.width = BRIG_WIDTH_NONE;
1333 memset (&repr.reserved, 0, sizeof (repr.reserved));
1334 brig_code.add (&repr, sizeof (repr));
1335 brig_insn_count++;
1338 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1339 necessary operands for writing. */
1341 static void
1342 emit_signal_insn (hsa_insn_signal *mem)
1344 struct BrigInstSignal repr;
1346 memset (&repr, 0, sizeof (repr));
1347 repr.base.base.byteCount = lendian16 (sizeof (repr));
1348 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1349 repr.base.opcode = lendian16 (mem->m_opcode);
1350 repr.base.type = lendian16 (mem->m_type);
1351 repr.base.operands = lendian32 (emit_insn_operands (mem));
1353 repr.memoryOrder = mem->m_memory_order;
1354 repr.signalOperation = mem->m_signalop;
1355 repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32;
1357 brig_code.add (&repr, sizeof (repr));
1358 brig_insn_count++;
1361 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1362 necessary operands for writing. */
1364 static void
1365 emit_atomic_insn (hsa_insn_atomic *mem)
1367 struct BrigInstAtomic repr;
1369 /* Either operand[0] or operand[1] must be an address operand. */
1370 hsa_op_address *addr = NULL;
1371 if (is_a <hsa_op_address *> (mem->get_op (0)))
1372 addr = as_a <hsa_op_address *> (mem->get_op (0));
1373 else
1374 addr = as_a <hsa_op_address *> (mem->get_op (1));
1376 memset (&repr, 0, sizeof (repr));
1377 repr.base.base.byteCount = lendian16 (sizeof (repr));
1378 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1379 repr.base.opcode = lendian16 (mem->m_opcode);
1380 repr.base.type = lendian16 (mem->m_type);
1381 repr.base.operands = lendian32 (emit_insn_operands (mem));
1383 if (addr->m_symbol)
1384 repr.segment = addr->m_symbol->m_segment;
1385 else
1386 repr.segment = BRIG_SEGMENT_FLAT;
1387 repr.memoryOrder = mem->m_memoryorder;
1388 repr.memoryScope = mem->m_memoryscope;
1389 repr.atomicOperation = mem->m_atomicop;
1391 brig_code.add (&repr, sizeof (repr));
1392 brig_insn_count++;
1395 /* Emit an HSA LDA instruction and all necessary directives, schedule
1396 necessary operands for writing. */
1398 static void
1399 emit_addr_insn (hsa_insn_basic *insn)
1401 struct BrigInstAddr repr;
1403 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1405 repr.base.base.byteCount = lendian16 (sizeof (repr));
1406 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1407 repr.base.opcode = lendian16 (insn->m_opcode);
1408 repr.base.type = lendian16 (insn->m_type);
1409 repr.base.operands = lendian32 (emit_insn_operands (insn));
1411 if (addr->m_symbol)
1412 repr.segment = addr->m_symbol->m_segment;
1413 else
1414 repr.segment = BRIG_SEGMENT_FLAT;
1415 memset (&repr.reserved, 0, sizeof (repr.reserved));
1417 brig_code.add (&repr, sizeof (repr));
1418 brig_insn_count++;
1421 /* Emit an HSA segment conversion instruction and all necessary directives,
1422 schedule necessary operands for writing. */
1424 static void
1425 emit_segment_insn (hsa_insn_seg *seg)
1427 struct BrigInstSegCvt repr;
1429 repr.base.base.byteCount = lendian16 (sizeof (repr));
1430 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1431 repr.base.opcode = lendian16 (seg->m_opcode);
1432 repr.base.type = lendian16 (seg->m_type);
1433 repr.base.operands = lendian32 (emit_insn_operands (seg));
1434 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1435 repr.segment = seg->m_segment;
1436 repr.modifier = 0;
1438 brig_code.add (&repr, sizeof (repr));
1440 brig_insn_count++;
1443 /* Emit an HSA alloca instruction and all necessary directives,
1444 schedule necessary operands for writing. */
1446 static void
1447 emit_alloca_insn (hsa_insn_alloca *alloca)
1449 struct BrigInstMem repr;
1450 gcc_checking_assert (alloca->operand_count () == 2);
1452 memset (&repr, 0, sizeof (repr));
1453 repr.base.base.byteCount = lendian16 (sizeof (repr));
1454 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1455 repr.base.opcode = lendian16 (alloca->m_opcode);
1456 repr.base.type = lendian16 (alloca->m_type);
1457 repr.base.operands = lendian32 (emit_insn_operands (alloca));
1458 repr.segment = BRIG_SEGMENT_PRIVATE;
1459 repr.modifier = 0;
1460 repr.equivClass = 0;
1461 repr.align = alloca->m_align;
1462 repr.width = BRIG_WIDTH_NONE;
1463 memset (&repr.reserved, 0, sizeof (repr.reserved));
1464 brig_code.add (&repr, sizeof (repr));
1465 brig_insn_count++;
1468 /* Emit an HSA comparison instruction and all necessary directives,
1469 schedule necessary operands for writing. */
1471 static void
1472 emit_cmp_insn (hsa_insn_cmp *cmp)
1474 struct BrigInstCmp repr;
1476 memset (&repr, 0, sizeof (repr));
1477 repr.base.base.byteCount = lendian16 (sizeof (repr));
1478 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1479 repr.base.opcode = lendian16 (cmp->m_opcode);
1480 repr.base.type = lendian16 (cmp->m_type);
1481 repr.base.operands = lendian32 (emit_insn_operands (cmp));
1483 if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1484 repr.sourceType
1485 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1486 else
1487 repr.sourceType
1488 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1489 repr.modifier = 0;
1490 repr.compare = cmp->m_compare;
1491 repr.pack = 0;
1493 brig_code.add (&repr, sizeof (repr));
1494 brig_insn_count++;
1497 /* Emit an HSA generic branching/sycnronization instruction. */
1499 static void
1500 emit_generic_branch_insn (hsa_insn_br *br)
1502 struct BrigInstBr repr;
1503 repr.base.base.byteCount = lendian16 (sizeof (repr));
1504 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1505 repr.base.opcode = lendian16 (br->m_opcode);
1506 repr.width = br->m_width;
1507 repr.base.type = lendian16 (br->m_type);
1508 repr.base.operands = lendian32 (emit_insn_operands (br));
1509 memset (&repr.reserved, 0, sizeof (repr.reserved));
1511 brig_code.add (&repr, sizeof (repr));
1512 brig_insn_count++;
1515 /* Emit an HSA conditional branching instruction and all necessary directives,
1516 schedule necessary operands for writing. */
1518 static void
1519 emit_cond_branch_insn (hsa_insn_cbr *br)
1521 struct BrigInstBr repr;
1523 basic_block target = NULL;
1524 edge_iterator ei;
1525 edge e;
1527 /* At the moment we only handle direct conditional jumps. */
1528 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1529 repr.base.base.byteCount = lendian16 (sizeof (repr));
1530 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1531 repr.base.opcode = lendian16 (br->m_opcode);
1532 repr.width = br->m_width;
1533 /* For Conditional jumps the type is always B1. */
1534 repr.base.type = lendian16 (BRIG_TYPE_B1);
1536 FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1537 if (e->flags & EDGE_TRUE_VALUE)
1539 target = e->dest;
1540 break;
1542 gcc_assert (target);
1544 repr.base.operands
1545 = lendian32 (emit_operands (br->get_op (0),
1546 &hsa_bb_for_bb (target)->m_label_ref));
1547 memset (&repr.reserved, 0, sizeof (repr.reserved));
1549 brig_code.add (&repr, sizeof (repr));
1550 brig_insn_count++;
1553 /* Emit an HSA unconditional jump branching instruction that points to
1554 a label REFERENCE. */
1556 static void
1557 emit_unconditional_jump (hsa_op_code_ref *reference)
1559 struct BrigInstBr repr;
1561 repr.base.base.byteCount = lendian16 (sizeof (repr));
1562 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1563 repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1564 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1565 /* Direct branches to labels must be width(all). */
1566 repr.width = BRIG_WIDTH_ALL;
1568 repr.base.operands = lendian32 (emit_operands (reference));
1569 memset (&repr.reserved, 0, sizeof (repr.reserved));
1570 brig_code.add (&repr, sizeof (repr));
1571 brig_insn_count++;
1574 /* Emit an HSA switch jump instruction that uses a jump table to
1575 jump to a destination label. */
1577 static void
1578 emit_switch_insn (hsa_insn_sbr *sbr)
1580 struct BrigInstBr repr;
1582 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1583 repr.base.base.byteCount = lendian16 (sizeof (repr));
1584 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1585 repr.base.opcode = lendian16 (sbr->m_opcode);
1586 repr.width = BRIG_WIDTH_1;
1587 /* For Conditional jumps the type is always B1. */
1588 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1589 repr.base.type = lendian16 (index->m_type);
1590 repr.base.operands
1591 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1592 memset (&repr.reserved, 0, sizeof (repr.reserved));
1594 brig_code.add (&repr, sizeof (repr));
1595 brig_insn_count++;
1598 /* Emit a HSA convert instruction and all necessary directives, schedule
1599 necessary operands for writing. */
1601 static void
1602 emit_cvt_insn (hsa_insn_cvt *insn)
1604 struct BrigInstCvt repr;
1605 BrigType16_t srctype;
1607 repr.base.base.byteCount = lendian16 (sizeof (repr));
1608 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1609 repr.base.opcode = lendian16 (insn->m_opcode);
1610 repr.base.type = lendian16 (insn->m_type);
1611 repr.base.operands = lendian32 (emit_insn_operands (insn));
1613 if (is_a <hsa_op_reg *> (insn->get_op (1)))
1614 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1615 else
1616 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1617 repr.sourceType = lendian16 (srctype);
1618 repr.modifier = 0;
1619 /* float to smaller float requires a rounding setting (we default
1620 to 'near'. */
1621 if (hsa_type_float_p (insn->m_type)
1622 && (!hsa_type_float_p (srctype)
1623 || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1624 < (srctype & BRIG_TYPE_BASE_MASK))))
1625 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1626 else if (hsa_type_integer_p (insn->m_type) &&
1627 hsa_type_float_p (srctype))
1628 repr.round = BRIG_ROUND_INTEGER_ZERO;
1629 else
1630 repr.round = BRIG_ROUND_NONE;
1631 brig_code.add (&repr, sizeof (repr));
1632 brig_insn_count++;
1635 /* Emit call instruction INSN, where this instruction must be closed
1636 within a call block instruction. */
1638 static void
1639 emit_call_insn (hsa_insn_call *call)
1641 struct BrigInstBr repr;
1643 repr.base.base.byteCount = lendian16 (sizeof (repr));
1644 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1645 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1646 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1648 repr.base.operands
1649 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1650 call->m_args_code_list));
1652 /* Internal functions have not set m_called_function. */
1653 if (call->m_called_function)
1655 function_linkage_pair pair (call->m_called_function,
1656 call->m_func.m_brig_op_offset);
1657 function_call_linkage.safe_push (pair);
1659 else
1661 hsa_internal_fn *slot
1662 = hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1663 gcc_assert (slot);
1664 gcc_assert (slot->m_offset > 0);
1665 call->m_func.m_directive_offset = slot->m_offset;
1668 repr.width = BRIG_WIDTH_ALL;
1669 memset (&repr.reserved, 0, sizeof (repr.reserved));
1671 brig_code.add (&repr, sizeof (repr));
1672 brig_insn_count++;
1675 /* Emit argument block directive. */
1677 static void
1678 emit_arg_block_insn (hsa_insn_arg_block *insn)
1680 switch (insn->m_kind)
1682 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1684 struct BrigDirectiveArgBlock repr;
1685 repr.base.byteCount = lendian16 (sizeof (repr));
1686 repr.base.kind = lendian16 (insn->m_kind);
1687 brig_code.add (&repr, sizeof (repr));
1689 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1691 insn->m_call_insn->m_args_code_list->m_offsets[i]
1692 = lendian32 (emit_directive_variable
1693 (insn->m_call_insn->m_input_args[i]));
1694 brig_insn_count++;
1697 if (insn->m_call_insn->m_output_arg)
1699 insn->m_call_insn->m_result_code_list->m_offsets[0]
1700 = lendian32 (emit_directive_variable
1701 (insn->m_call_insn->m_output_arg));
1702 brig_insn_count++;
1705 break;
1707 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1709 struct BrigDirectiveArgBlock repr;
1710 repr.base.byteCount = lendian16 (sizeof (repr));
1711 repr.base.kind = lendian16 (insn->m_kind);
1712 brig_code.add (&repr, sizeof (repr));
1713 break;
1715 default:
1716 gcc_unreachable ();
1719 brig_insn_count++;
1722 /* Emit comment directive. */
1724 static void
1725 emit_comment_insn (hsa_insn_comment *insn)
1727 struct BrigDirectiveComment repr;
1728 memset (&repr, 0, sizeof (repr));
1730 repr.base.byteCount = lendian16 (sizeof (repr));
1731 repr.base.kind = lendian16 (insn->m_opcode);
1732 repr.name = brig_emit_string (insn->m_comment, '\0', false);
1733 brig_code.add (&repr, sizeof (repr));
1736 /* Emit queue instruction INSN. */
1738 static void
1739 emit_queue_insn (hsa_insn_queue *insn)
1741 BrigInstQueue repr;
1742 memset (&repr, 0, sizeof (repr));
1744 repr.base.base.byteCount = lendian16 (sizeof (repr));
1745 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1746 repr.base.opcode = lendian16 (insn->m_opcode);
1747 repr.base.type = lendian16 (insn->m_type);
1748 repr.segment = insn->m_segment;
1749 repr.memoryOrder = insn->m_memory_order;
1750 repr.base.operands = lendian32 (emit_insn_operands (insn));
1751 brig_data.round_size_up (4);
1752 brig_code.add (&repr, sizeof (repr));
1754 brig_insn_count++;
1757 /* Emit source type instruction INSN. */
1759 static void
1760 emit_srctype_insn (hsa_insn_srctype *insn)
1762 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1763 struct BrigInstSourceType repr;
1764 unsigned operand_count = insn->operand_count ();
1765 gcc_checking_assert (operand_count >= 2);
1767 memset (&repr, 0, sizeof (repr));
1768 repr.sourceType = lendian16 (insn->m_source_type);
1769 repr.base.base.byteCount = lendian16 (sizeof (repr));
1770 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1771 repr.base.opcode = lendian16 (insn->m_opcode);
1772 repr.base.type = lendian16 (insn->m_type);
1774 repr.base.operands = lendian32 (emit_insn_operands (insn));
1775 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1776 brig_insn_count++;
1779 /* Emit packed instruction INSN. */
1781 static void
1782 emit_packed_insn (hsa_insn_packed *insn)
1784 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1785 struct BrigInstSourceType repr;
1786 unsigned operand_count = insn->operand_count ();
1787 gcc_checking_assert (operand_count >= 2);
1789 memset (&repr, 0, sizeof (repr));
1790 repr.sourceType = lendian16 (insn->m_source_type);
1791 repr.base.base.byteCount = lendian16 (sizeof (repr));
1792 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1793 repr.base.opcode = lendian16 (insn->m_opcode);
1794 repr.base.type = lendian16 (insn->m_type);
1796 if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1798 /* Create operand list for packed type. */
1799 for (unsigned i = 1; i < operand_count; i++)
1801 gcc_checking_assert (insn->get_op (i));
1802 insn->m_operand_list->m_offsets[i - 1]
1803 = lendian32 (enqueue_op (insn->get_op (i)));
1806 repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1807 insn->m_operand_list));
1809 else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1811 /* Create operand list for packed type. */
1812 for (unsigned i = 0; i < operand_count - 1; i++)
1814 gcc_checking_assert (insn->get_op (i));
1815 insn->m_operand_list->m_offsets[i]
1816 = lendian32 (enqueue_op (insn->get_op (i)));
1819 unsigned ops = emit_operands (insn->m_operand_list,
1820 insn->get_op (insn->operand_count () - 1));
1821 repr.base.operands = lendian32 (ops);
1825 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1826 brig_insn_count++;
1829 /* Emit a basic HSA instruction and all necessary directives, schedule
1830 necessary operands for writing. */
1832 static void
1833 emit_basic_insn (hsa_insn_basic *insn)
1835 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1836 struct BrigInstMod repr;
1837 BrigType16_t type;
1839 memset (&repr, 0, sizeof (repr));
1840 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1841 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1842 repr.base.opcode = lendian16 (insn->m_opcode);
1843 switch (insn->m_opcode)
1845 /* And the bit-logical operations need bit types and whine about
1846 arithmetic types :-/ */
1847 case BRIG_OPCODE_AND:
1848 case BRIG_OPCODE_OR:
1849 case BRIG_OPCODE_XOR:
1850 case BRIG_OPCODE_NOT:
1851 type = regtype_for_type (insn->m_type);
1852 break;
1853 default:
1854 type = insn->m_type;
1855 break;
1857 repr.base.type = lendian16 (type);
1858 repr.base.operands = lendian32 (emit_insn_operands (insn));
1860 if (hsa_type_packed_p (type))
1862 if (hsa_type_float_p (type)
1863 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1864 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1865 else
1866 repr.round = 0;
1867 /* We assume that destination and sources agree in packing layout. */
1868 if (insn->num_used_ops () >= 2)
1869 repr.pack = BRIG_PACK_PP;
1870 else
1871 repr.pack = BRIG_PACK_P;
1872 repr.reserved = 0;
1873 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1874 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1875 brig_code.add (&repr, sizeof (struct BrigInstMod));
1877 else
1878 brig_code.add (&repr, sizeof (struct BrigInstBasic));
1879 brig_insn_count++;
1882 /* Emit an HSA instruction and all necessary directives, schedule necessary
1883 operands for writing. */
1885 static void
1886 emit_insn (hsa_insn_basic *insn)
1888 gcc_assert (!is_a <hsa_insn_phi *> (insn));
1890 insn->m_brig_offset = brig_code.total_size;
1892 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1893 emit_signal_insn (signal);
1894 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1895 emit_atomic_insn (atom);
1896 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1897 emit_memory_insn (mem);
1898 else if (insn->m_opcode == BRIG_OPCODE_LDA)
1899 emit_addr_insn (insn);
1900 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1901 emit_segment_insn (seg);
1902 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1903 emit_cmp_insn (cmp);
1904 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
1905 emit_cond_branch_insn (br);
1906 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1908 if (switch_instructions == NULL)
1909 switch_instructions = new vec <hsa_insn_sbr *> ();
1911 switch_instructions->safe_push (sbr);
1912 emit_switch_insn (sbr);
1914 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1915 emit_generic_branch_insn (br);
1916 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1917 emit_arg_block_insn (block);
1918 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1919 emit_call_insn (call);
1920 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1921 emit_comment_insn (comment);
1922 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1923 emit_queue_insn (queue);
1924 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1925 emit_srctype_insn (srctype);
1926 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1927 emit_packed_insn (packed);
1928 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1929 emit_cvt_insn (cvt);
1930 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1931 emit_alloca_insn (alloca);
1932 else
1933 emit_basic_insn (insn);
1936 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1937 or we are about to finish emitting code, if it is NULL. If the fall through
1938 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1940 static void
1941 perhaps_emit_branch (basic_block bb, basic_block next_bb)
1943 basic_block t_bb = NULL, ff = NULL;
1945 edge_iterator ei;
1946 edge e;
1948 /* If the last instruction of BB is a switch, ignore emission of all
1949 edges. */
1950 if (hsa_bb_for_bb (bb)->m_last_insn
1951 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1952 return;
1954 FOR_EACH_EDGE (e, ei, bb->succs)
1955 if (e->flags & EDGE_TRUE_VALUE)
1957 gcc_assert (!t_bb);
1958 t_bb = e->dest;
1960 else
1962 gcc_assert (!ff);
1963 ff = e->dest;
1966 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1967 return;
1969 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1972 /* Emit the a function with name NAME to the various brig sections. */
1974 void
1975 hsa_brig_emit_function (void)
1977 basic_block bb, prev_bb;
1978 hsa_insn_basic *insn;
1979 BrigDirectiveExecutable *ptr_to_fndir;
1981 brig_init ();
1983 brig_insn_count = 0;
1984 memset (&op_queue, 0, sizeof (op_queue));
1985 op_queue.projected_size = brig_operand.total_size;
1987 if (!function_offsets)
1988 function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1990 if (!emitted_declarations)
1991 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1993 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1995 tree called = hsa_cfun->m_called_functions[i];
1997 /* If the function has no definition, emit a declaration. */
1998 if (!emitted_declarations->get (called))
2000 BrigDirectiveExecutable *e = emit_function_declaration (called);
2001 emitted_declarations->put (called, e);
2005 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
2007 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
2008 emit_internal_fn_decl (called);
2011 ptr_to_fndir = emit_function_directives (hsa_cfun, false);
2012 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
2013 insn;
2014 insn = insn->m_next)
2015 emit_insn (insn);
2016 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2017 FOR_EACH_BB_FN (bb, cfun)
2019 perhaps_emit_branch (prev_bb, bb);
2020 emit_bb_label_directive (hsa_bb_for_bb (bb));
2021 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2022 emit_insn (insn);
2023 prev_bb = bb;
2025 perhaps_emit_branch (prev_bb, NULL);
2026 ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size);
2028 /* Fill up label references for all sbr instructions. */
2029 if (switch_instructions)
2031 for (unsigned i = 0; i < switch_instructions->length (); i++)
2033 hsa_insn_sbr *sbr = (*switch_instructions)[i];
2034 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2036 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2037 sbr->m_label_code_list->m_offsets[j]
2038 = hbb->m_label_ref.m_directive_offset;
2042 switch_instructions->release ();
2043 delete switch_instructions;
2044 switch_instructions = NULL;
2047 if (dump_file)
2049 fprintf (dump_file, "------- After BRIG emission: -------\n");
2050 dump_hsa_cfun (dump_file);
2053 emit_queued_operands ();
2056 /* Emit all OMP symbols related to OMP. */
2058 void
2059 hsa_brig_emit_omp_symbols (void)
2061 brig_init ();
2062 emit_directive_variable (hsa_num_threads);
2065 /* Create and return __hsa_global_variables symbol that contains
2066 all informations consumed by libgomp to link global variables
2067 with their string names used by an HSA kernel. */
2069 static tree
2070 hsa_output_global_variables ()
2072 unsigned l = hsa_global_variable_symbols->elements ();
2074 tree variable_info_type = make_node (RECORD_TYPE);
2075 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2076 get_identifier ("name"), ptr_type_node);
2077 DECL_CHAIN (id_f1) = NULL_TREE;
2078 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2079 get_identifier ("omp_data_size"),
2080 ptr_type_node);
2081 DECL_CHAIN (id_f2) = id_f1;
2082 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2083 NULL_TREE);
2085 tree int_num_of_global_vars;
2086 int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2087 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2088 tree global_vars_array_type = build_array_type (variable_info_type,
2089 global_vars_num_index_type);
2090 TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2092 vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2094 for (hash_table <hsa_noop_symbol_hasher>::iterator it
2095 = hsa_global_variable_symbols->begin ();
2096 it != hsa_global_variable_symbols->end (); ++it)
2098 unsigned len = strlen ((*it)->m_name);
2099 char *copy = XNEWVEC (char, len + 2);
2100 copy[0] = '&';
2101 memcpy (copy + 1, (*it)->m_name, len);
2102 copy[len + 1] = '\0';
2103 len++;
2104 hsa_sanitize_name (copy);
2106 tree var_name = build_string (len, copy);
2107 TREE_TYPE (var_name)
2108 = build_array_type (char_type_node, build_index_type (size_int (len)));
2109 free (copy);
2111 vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2112 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2113 build1 (ADDR_EXPR,
2114 build_pointer_type (TREE_TYPE (var_name)),
2115 var_name));
2116 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2117 build_fold_addr_expr ((*it)->m_decl));
2119 tree variable_info_ctor = build_constructor (variable_info_type,
2120 variable_info_vec);
2122 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2123 variable_info_ctor);
2126 tree global_vars_ctor = build_constructor (global_vars_array_type,
2127 global_vars_vec);
2129 char tmp_name[64];
2130 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2131 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2132 get_identifier (tmp_name),
2133 global_vars_array_type);
2134 TREE_STATIC (global_vars_table) = 1;
2135 TREE_READONLY (global_vars_table) = 1;
2136 TREE_PUBLIC (global_vars_table) = 0;
2137 DECL_ARTIFICIAL (global_vars_table) = 1;
2138 DECL_IGNORED_P (global_vars_table) = 1;
2139 DECL_EXTERNAL (global_vars_table) = 0;
2140 TREE_CONSTANT (global_vars_table) = 1;
2141 DECL_INITIAL (global_vars_table) = global_vars_ctor;
2142 varpool_node::finalize_decl (global_vars_table);
2144 return global_vars_table;
2147 /* Create __hsa_host_functions and __hsa_kernels that contain
2148 all informations consumed by libgomp to register all kernels
2149 in the BRIG binary. */
2151 static void
2152 hsa_output_kernels (tree *host_func_table, tree *kernels)
2154 unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2156 tree int_num_of_kernels;
2157 int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2158 tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2159 tree host_functions_array_type = build_array_type (ptr_type_node,
2160 kernel_num_index_type);
2161 TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2163 vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2164 for (unsigned i = 0; i < map_count; ++i)
2166 tree decl = hsa_get_decl_kernel_mapping_decl (i);
2167 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2168 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2170 tree host_functions_ctor = build_constructor (host_functions_array_type,
2171 host_functions_vec);
2172 char tmp_name[64];
2173 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2174 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2175 get_identifier (tmp_name),
2176 host_functions_array_type);
2177 TREE_STATIC (hsa_host_func_table) = 1;
2178 TREE_READONLY (hsa_host_func_table) = 1;
2179 TREE_PUBLIC (hsa_host_func_table) = 0;
2180 DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2181 DECL_IGNORED_P (hsa_host_func_table) = 1;
2182 DECL_EXTERNAL (hsa_host_func_table) = 0;
2183 TREE_CONSTANT (hsa_host_func_table) = 1;
2184 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2185 varpool_node::finalize_decl (hsa_host_func_table);
2186 *host_func_table = hsa_host_func_table;
2188 /* Following code emits list of kernel_info structures. */
2190 tree kernel_info_type = make_node (RECORD_TYPE);
2191 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2192 get_identifier ("name"), ptr_type_node);
2193 DECL_CHAIN (id_f1) = NULL_TREE;
2194 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2195 get_identifier ("omp_data_size"),
2196 unsigned_type_node);
2197 DECL_CHAIN (id_f2) = id_f1;
2198 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2199 get_identifier ("gridified_kernel_p"),
2200 boolean_type_node);
2201 DECL_CHAIN (id_f3) = id_f2;
2202 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2203 get_identifier ("kernel_dependencies_count"),
2204 unsigned_type_node);
2205 DECL_CHAIN (id_f4) = id_f3;
2206 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2207 get_identifier ("kernel_dependencies"),
2208 build_pointer_type (build_pointer_type
2209 (char_type_node)));
2210 DECL_CHAIN (id_f5) = id_f4;
2211 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2212 NULL_TREE);
2214 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2215 tree kernel_info_vector_type
2216 = build_array_type (kernel_info_type,
2217 build_index_type (int_num_of_kernels));
2218 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2220 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2221 tree kernel_dependencies_vector_type = NULL;
2223 for (unsigned i = 0; i < map_count; ++i)
2225 tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2226 char *name = hsa_get_decl_kernel_mapping_name (i);
2227 unsigned len = strlen (name);
2228 char *copy = XNEWVEC (char, len + 2);
2229 copy[0] = '&';
2230 memcpy (copy + 1, name, len);
2231 copy[len + 1] = '\0';
2232 len++;
2234 tree kern_name = build_string (len, copy);
2235 TREE_TYPE (kern_name)
2236 = build_array_type (char_type_node, build_index_type (size_int (len)));
2237 free (copy);
2239 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2240 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2241 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2242 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2243 gridified_kernel_p);
2244 unsigned count = 0;
2245 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2246 if (hsa_decl_kernel_dependencies)
2248 vec<const char *> **slot;
2249 slot = hsa_decl_kernel_dependencies->get (kernel);
2250 if (slot)
2252 vec <const char *> *dependencies = *slot;
2253 count = dependencies->length ();
2255 kernel_dependencies_vector_type
2256 = build_array_type (build_pointer_type (char_type_node),
2257 build_index_type (size_int (count)));
2258 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2260 for (unsigned j = 0; j < count; j++)
2262 const char *d = (*dependencies)[j];
2263 len = strlen (d);
2264 tree dependency_name = build_string (len, d);
2265 TREE_TYPE (dependency_name)
2266 = build_array_type (char_type_node,
2267 build_index_type (size_int (len)));
2269 CONSTRUCTOR_APPEND_ELT
2270 (kernel_dependencies_vec, NULL_TREE,
2271 build1 (ADDR_EXPR,
2272 build_pointer_type (TREE_TYPE (dependency_name)),
2273 dependency_name));
2278 tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2280 vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2281 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2282 build1 (ADDR_EXPR,
2283 build_pointer_type (TREE_TYPE
2284 (kern_name)),
2285 kern_name));
2286 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2287 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2288 gridified_kernel_p_tree);
2289 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2291 if (count > 0)
2293 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2294 gcc_checking_assert (kernel_dependencies_vector_type);
2295 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2296 get_identifier (tmp_name),
2297 kernel_dependencies_vector_type);
2299 TREE_STATIC (dependencies_list) = 1;
2300 TREE_READONLY (dependencies_list) = 1;
2301 TREE_PUBLIC (dependencies_list) = 0;
2302 DECL_ARTIFICIAL (dependencies_list) = 1;
2303 DECL_IGNORED_P (dependencies_list) = 1;
2304 DECL_EXTERNAL (dependencies_list) = 0;
2305 TREE_CONSTANT (dependencies_list) = 1;
2306 DECL_INITIAL (dependencies_list)
2307 = build_constructor (kernel_dependencies_vector_type,
2308 kernel_dependencies_vec);
2309 varpool_node::finalize_decl (dependencies_list);
2311 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2312 build1 (ADDR_EXPR,
2313 build_pointer_type
2314 (TREE_TYPE (dependencies_list)),
2315 dependencies_list));
2317 else
2318 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2320 tree kernel_info_ctor = build_constructor (kernel_info_type,
2321 kernel_info_vec);
2323 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2324 kernel_info_ctor);
2327 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2328 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2329 get_identifier (tmp_name),
2330 kernel_info_vector_type);
2332 TREE_STATIC (hsa_kernels) = 1;
2333 TREE_READONLY (hsa_kernels) = 1;
2334 TREE_PUBLIC (hsa_kernels) = 0;
2335 DECL_ARTIFICIAL (hsa_kernels) = 1;
2336 DECL_IGNORED_P (hsa_kernels) = 1;
2337 DECL_EXTERNAL (hsa_kernels) = 0;
2338 TREE_CONSTANT (hsa_kernels) = 1;
2339 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2340 kernel_info_vector_vec);
2341 varpool_node::finalize_decl (hsa_kernels);
2342 *kernels = hsa_kernels;
2345 /* Create a static constructor that will register out brig stuff with
2346 libgomp. */
2348 static void
2349 hsa_output_libgomp_mapping (tree brig_decl)
2351 unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2352 unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2354 tree kernels;
2355 tree host_func_table;
2357 hsa_output_kernels (&host_func_table, &kernels);
2358 tree global_vars = hsa_output_global_variables ();
2360 tree hsa_image_desc_type = make_node (RECORD_TYPE);
2361 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2362 get_identifier ("brig_module"), ptr_type_node);
2363 DECL_CHAIN (id_f1) = NULL_TREE;
2364 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2365 get_identifier ("kernel_count"),
2366 unsigned_type_node);
2368 DECL_CHAIN (id_f2) = id_f1;
2369 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2370 get_identifier ("hsa_kernel_infos"),
2371 ptr_type_node);
2372 DECL_CHAIN (id_f3) = id_f2;
2373 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2374 get_identifier ("global_variable_count"),
2375 unsigned_type_node);
2376 DECL_CHAIN (id_f4) = id_f3;
2377 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2378 get_identifier ("hsa_global_variable_infos"),
2379 ptr_type_node);
2380 DECL_CHAIN (id_f5) = id_f4;
2381 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2382 NULL_TREE);
2383 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2385 vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2386 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2387 build_fold_addr_expr (brig_decl));
2388 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2389 build_int_cstu (unsigned_type_node, kernel_count));
2390 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2391 build1 (ADDR_EXPR,
2392 build_pointer_type (TREE_TYPE (kernels)),
2393 kernels));
2394 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2395 build_int_cstu (unsigned_type_node,
2396 global_variable_count));
2397 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2398 build1 (ADDR_EXPR,
2399 build_pointer_type (TREE_TYPE (global_vars)),
2400 global_vars));
2402 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2404 char tmp_name[64];
2405 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2406 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2407 get_identifier (tmp_name),
2408 hsa_image_desc_type);
2409 TREE_STATIC (hsa_img_descriptor) = 1;
2410 TREE_READONLY (hsa_img_descriptor) = 1;
2411 TREE_PUBLIC (hsa_img_descriptor) = 0;
2412 DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2413 DECL_IGNORED_P (hsa_img_descriptor) = 1;
2414 DECL_EXTERNAL (hsa_img_descriptor) = 0;
2415 TREE_CONSTANT (hsa_img_descriptor) = 1;
2416 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2417 varpool_node::finalize_decl (hsa_img_descriptor);
2419 /* Construct the "host_table" libgomp expects. */
2420 tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2421 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2422 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2423 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2424 tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2425 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2426 host_func_table_addr);
2427 offset_int func_table_size
2428 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2429 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2430 fold_build2 (POINTER_PLUS_EXPR,
2431 TREE_TYPE (host_func_table_addr),
2432 host_func_table_addr,
2433 build_int_cst (size_type_node,
2434 func_table_size.to_uhwi
2435 ())));
2436 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2437 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2438 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2439 libgomp_host_table_vec);
2440 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2441 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2442 get_identifier (tmp_name),
2443 libgomp_host_table_type);
2445 TREE_STATIC (hsa_libgomp_host_table) = 1;
2446 TREE_READONLY (hsa_libgomp_host_table) = 1;
2447 TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2448 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2449 DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2450 DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2451 TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2452 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2453 varpool_node::finalize_decl (hsa_libgomp_host_table);
2455 /* Generate an initializer with a call to the registration routine. */
2457 tree offload_register
2458 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2459 gcc_checking_assert (offload_register);
2461 tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2462 append_to_statement_list
2463 (build_call_expr (offload_register, 4,
2464 build_int_cstu (unsigned_type_node,
2465 GOMP_VERSION_PACK (GOMP_VERSION,
2466 GOMP_VERSION_HSA)),
2467 build_fold_addr_expr (hsa_libgomp_host_table),
2468 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2469 build_fold_addr_expr (hsa_img_descriptor)),
2470 hsa_ctor_stmts);
2472 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2474 tree offload_unregister
2475 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2476 gcc_checking_assert (offload_unregister);
2478 tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2479 append_to_statement_list
2480 (build_call_expr (offload_unregister, 4,
2481 build_int_cstu (unsigned_type_node,
2482 GOMP_VERSION_PACK (GOMP_VERSION,
2483 GOMP_VERSION_HSA)),
2484 build_fold_addr_expr (hsa_libgomp_host_table),
2485 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2486 build_fold_addr_expr (hsa_img_descriptor)),
2487 hsa_dtor_stmts);
2488 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2491 /* Emit the brig module we have compiled to a section in the final assembly and
2492 also create a compile unit static constructor that will register the brig
2493 module with libgomp. */
2495 void
2496 hsa_output_brig (void)
2498 section *saved_section;
2500 if (!brig_initialized)
2501 return;
2503 for (unsigned i = 0; i < function_call_linkage.length (); i++)
2505 function_linkage_pair p = function_call_linkage[i];
2507 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2508 gcc_assert (*func_offset);
2509 BrigOperandCodeRef *code_ref
2510 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2511 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2512 code_ref->ref = lendian32 (*func_offset);
2515 /* Iterate all function declarations and if we meet a function that should
2516 have module linkage and we are unable to emit HSAIL for the function,
2517 then change the linkage to program linkage. Doing so, we will emit
2518 a valid BRIG image. */
2519 if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2520 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2521 = emitted_declarations->begin ();
2522 it != emitted_declarations->end ();
2523 ++it)
2525 if (hsa_failed_functions->contains ((*it).first))
2526 (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2529 saved_section = in_section;
2531 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2532 char tmp_name[64];
2533 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2534 ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2535 tree brig_id = get_identifier (tmp_name);
2536 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2537 char_type_node);
2538 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2539 TREE_ADDRESSABLE (brig_decl) = 1;
2540 TREE_READONLY (brig_decl) = 1;
2541 DECL_ARTIFICIAL (brig_decl) = 1;
2542 DECL_IGNORED_P (brig_decl) = 1;
2543 TREE_STATIC (brig_decl) = 1;
2544 TREE_PUBLIC (brig_decl) = 0;
2545 TREE_USED (brig_decl) = 1;
2546 DECL_INITIAL (brig_decl) = brig_decl;
2547 TREE_ASM_WRITTEN (brig_decl) = 1;
2549 BrigModuleHeader module_header;
2550 memcpy (&module_header.identification, "HSA BRIG",
2551 sizeof (module_header.identification));
2552 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2553 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2554 uint64_t section_index[3];
2556 int data_padding, code_padding, operand_padding;
2557 data_padding = HSA_SECTION_ALIGNMENT
2558 - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2559 code_padding = HSA_SECTION_ALIGNMENT
2560 - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2561 operand_padding = HSA_SECTION_ALIGNMENT
2562 - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2564 uint64_t module_size = sizeof (module_header)
2565 + sizeof (section_index)
2566 + brig_data.total_size
2567 + data_padding
2568 + brig_code.total_size
2569 + code_padding
2570 + brig_operand.total_size
2571 + operand_padding;
2572 gcc_assert ((module_size % 16) == 0);
2573 module_header.byteCount = lendian64 (module_size);
2574 memset (&module_header.hash, 0, sizeof (module_header.hash));
2575 module_header.reserved = 0;
2576 module_header.sectionCount = lendian32 (3);
2577 module_header.sectionIndex = lendian64 (sizeof (module_header));
2578 assemble_string ((const char *) &module_header, sizeof (module_header));
2579 uint64_t off = sizeof (module_header) + sizeof (section_index);
2580 section_index[0] = lendian64 (off);
2581 off += brig_data.total_size + data_padding;
2582 section_index[1] = lendian64 (off);
2583 off += brig_code.total_size + code_padding;
2584 section_index[2] = lendian64 (off);
2585 assemble_string ((const char *) &section_index, sizeof (section_index));
2587 char padding[HSA_SECTION_ALIGNMENT];
2588 memset (padding, 0, sizeof (padding));
2590 brig_data.output ();
2591 assemble_string (padding, data_padding);
2592 brig_code.output ();
2593 assemble_string (padding, code_padding);
2594 brig_operand.output ();
2595 assemble_string (padding, operand_padding);
2597 if (saved_section)
2598 switch_to_section (saved_section);
2600 hsa_output_libgomp_mapping (brig_decl);
2602 hsa_free_decl_kernel_mapping ();
2603 brig_release_data ();
2604 hsa_deinit_compilation_unit_data ();
2606 delete emitted_declarations;
2607 emitted_declarations = NULL;
2608 delete function_offsets;
2609 function_offsets = NULL;