2018-03-27 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / hsa-brig.c
blobd3efff40453f690c7a7515905177b706466429cb
1 /* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2018 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "target.h"
27 #include "memmodel.h"
28 #include "tm_p.h"
29 #include "is-a.h"
30 #include "vec.h"
31 #include "hash-table.h"
32 #include "hash-map.h"
33 #include "tree.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "cfg.h"
39 #include "function.h"
40 #include "fold-const.h"
41 #include "stringpool.h"
42 #include "gimple-pretty-print.h"
43 #include "diagnostic-core.h"
44 #include "cgraph.h"
45 #include "dumpfile.h"
46 #include "print-tree.h"
47 #include "symbol-summary.h"
48 #include "hsa-common.h"
49 #include "gomp-constants.h"
51 /* Convert VAL to little endian form, if necessary. */
53 static uint16_t
54 lendian16 (uint16_t val)
56 #if GCC_VERSION >= 4008
57 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
58 return val;
59 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
60 return __builtin_bswap16 (val);
61 #else /* __ORDER_PDP_ENDIAN__ */
62 return val;
63 #endif
64 #else
65 // provide a safe slower default, with shifts and masking
66 #ifndef WORDS_BIGENDIAN
67 return val;
68 #else
69 return (val >> 8) | (val << 8);
70 #endif
71 #endif
74 /* Convert VAL to little endian form, if necessary. */
76 static uint32_t
77 lendian32 (uint32_t val)
79 #if GCC_VERSION >= 4006
80 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
81 return val;
82 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
83 return __builtin_bswap32 (val);
84 #else /* __ORDER_PDP_ENDIAN__ */
85 return (val >> 16) | (val << 16);
86 #endif
87 #else
88 // provide a safe slower default, with shifts and masking
89 #ifndef WORDS_BIGENDIAN
90 return val;
91 #else
92 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
93 return (val >> 16) | (val << 16);
94 #endif
95 #endif
98 /* Convert VAL to little endian form, if necessary. */
100 static uint64_t
101 lendian64 (uint64_t val)
103 #if GCC_VERSION >= 4006
104 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
105 return val;
106 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
107 return __builtin_bswap64 (val);
108 #else /* __ORDER_PDP_ENDIAN__ */
109 return (((val & 0xffffll) << 48)
110 | ((val & 0xffff0000ll) << 16)
111 | ((val & 0xffff00000000ll) >> 16)
112 | ((val & 0xffff000000000000ll) >> 48));
113 #endif
114 #else
115 // provide a safe slower default, with shifts and masking
116 #ifndef WORDS_BIGENDIAN
117 return val;
118 #else
119 val = (((val & 0xff00ff00ff00ff00ll) >> 8)
120 | ((val & 0x00ff00ff00ff00ffll) << 8));
121 val = ((( val & 0xffff0000ffff0000ll) >> 16)
122 | (( val & 0x0000ffff0000ffffll) << 16));
123 return (val >> 32) | (val << 32);
124 #endif
125 #endif
128 #define BRIG_ELF_SECTION_NAME ".brig"
129 #define BRIG_LABEL_STRING "hsa_brig"
130 #define BRIG_SECTION_DATA_NAME "hsa_data"
131 #define BRIG_SECTION_CODE_NAME "hsa_code"
132 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
134 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
136 /* Required HSA section alignment. */
138 #define HSA_SECTION_ALIGNMENT 16
140 /* Chunks of BRIG binary data. */
142 struct hsa_brig_data_chunk
144 /* Size of the data already stored into a chunk. */
145 unsigned size;
147 /* Pointer to the data. */
148 char *data;
151 /* Structure representing a BRIG section, holding and writing its data. */
153 class hsa_brig_section
155 public:
156 /* Section name that will be output to the BRIG. */
157 const char *section_name;
158 /* Size in bytes of all data stored in the section. */
159 unsigned total_size;
160 /* The size of the header of the section including padding. */
161 unsigned header_byte_count;
162 /* The size of the header of the section without any padding. */
163 unsigned header_byte_delta;
165 void init (const char *name);
166 void release ();
167 void output ();
168 unsigned add (const void *data, unsigned len, void **output = NULL);
169 void round_size_up (int factor);
170 void *get_ptr_by_offset (unsigned int offset);
172 private:
173 void allocate_new_chunk ();
175 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
176 vec <struct hsa_brig_data_chunk> chunks;
178 /* More convenient access to the last chunk from the vector above. */
179 struct hsa_brig_data_chunk *cur_chunk;
182 static struct hsa_brig_section brig_data, brig_code, brig_operand;
183 static uint32_t brig_insn_count;
184 static bool brig_initialized = false;
186 /* Mapping between emitted HSA functions and their offset in code segment. */
187 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
189 /* Hash map of emitted function declarations. */
190 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
192 /* Hash table of emitted internal function declaration offsets. */
193 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
195 /* List of sbr instructions. */
196 static vec <hsa_insn_sbr *> *switch_instructions;
198 struct function_linkage_pair
200 function_linkage_pair (tree decl, unsigned int off)
201 : function_decl (decl), offset (off) {}
203 /* Declaration of called function. */
204 tree function_decl;
206 /* Offset in operand section. */
207 unsigned int offset;
210 /* Vector of function calls where we need to resolve function offsets. */
211 static auto_vec <function_linkage_pair> function_call_linkage;
213 /* Add a new chunk, allocate data for it and initialize it. */
215 void
216 hsa_brig_section::allocate_new_chunk ()
218 struct hsa_brig_data_chunk new_chunk;
220 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
221 new_chunk.size = 0;
222 cur_chunk = chunks.safe_push (new_chunk);
225 /* Initialize the brig section. */
227 void
228 hsa_brig_section::init (const char *name)
230 section_name = name;
231 /* While the following computation is basically wrong, because the intent
232 certainly wasn't to have the first character of name and padding, which
233 are a part of sizeof (BrigSectionHeader), included in the first addend,
234 this is what the disassembler expects. */
235 total_size = sizeof (BrigSectionHeader) + strlen (section_name);
236 chunks.create (1);
237 allocate_new_chunk ();
238 header_byte_delta = total_size;
239 round_size_up (4);
240 header_byte_count = total_size;
243 /* Free all data in the section. */
245 void
246 hsa_brig_section::release ()
248 for (unsigned i = 0; i < chunks.length (); i++)
249 free (chunks[i].data);
250 chunks.release ();
251 cur_chunk = NULL;
254 /* Write the section to the output file to a section with the name given at
255 initialization. Switches the output section and does not restore it. */
257 void
258 hsa_brig_section::output ()
260 struct BrigSectionHeader section_header;
261 char padding[8];
263 section_header.byteCount = lendian64 (total_size);
264 section_header.headerByteCount = lendian32 (header_byte_count);
265 section_header.nameLength = lendian32 (strlen (section_name));
266 assemble_string ((const char *) &section_header, 16);
267 assemble_string (section_name, (section_header.nameLength));
268 memset (&padding, 0, sizeof (padding));
269 /* This is also a consequence of the wrong header size computation described
270 in a comment in hsa_brig_section::init. */
271 assemble_string (padding, 8);
272 for (unsigned i = 0; i < chunks.length (); i++)
273 assemble_string (chunks[i].data, chunks[i].size);
276 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
277 which it was stored. If OUTPUT is not NULL, store into it the pointer to
278 the place where DATA was actually stored. */
280 unsigned
281 hsa_brig_section::add (const void *data, unsigned len, void **output)
283 unsigned offset = total_size;
285 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
286 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
287 allocate_new_chunk ();
289 char *dst = cur_chunk->data + cur_chunk->size;
290 memcpy (dst, data, len);
291 if (output)
292 *output = dst;
293 cur_chunk->size += len;
294 total_size += len;
296 return offset;
299 /* Add padding to section so that its size is divisible by FACTOR. */
301 void
302 hsa_brig_section::round_size_up (int factor)
304 unsigned padding, res = total_size % factor;
306 if (res == 0)
307 return;
309 padding = factor - res;
310 total_size += padding;
311 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
313 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
314 cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
315 allocate_new_chunk ();
318 cur_chunk->size += padding;
321 /* Return pointer to data by global OFFSET in the section. */
323 void *
324 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
326 gcc_assert (offset < total_size);
327 offset -= header_byte_delta;
329 unsigned i;
330 for (i = 0; offset >= chunks[i].size; i++)
331 offset -= chunks[i].size;
333 return chunks[i].data + offset;
336 /* BRIG string data hashing. */
338 struct brig_string_slot
340 const char *s;
341 char prefix;
342 int len;
343 uint32_t offset;
346 /* Hash table helpers. */
348 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
350 static inline hashval_t hash (const value_type);
351 static inline bool equal (const value_type, const compare_type);
352 static inline void remove (value_type);
355 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
356 to support strings that may not end in '\0'. */
358 inline hashval_t
359 brig_string_slot_hasher::hash (const value_type ds)
361 hashval_t r = ds->len;
362 int i;
364 for (i = 0; i < ds->len; i++)
365 r = r * 67 + (unsigned) ds->s[i] - 113;
366 r = r * 67 + (unsigned) ds->prefix - 113;
367 return r;
370 /* Returns nonzero if DS1 and DS2 are equal. */
372 inline bool
373 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
375 if (ds1->len == ds2->len)
376 return ds1->prefix == ds2->prefix
377 && memcmp (ds1->s, ds2->s, ds1->len) == 0;
379 return 0;
382 /* Deallocate memory for DS upon its removal. */
384 inline void
385 brig_string_slot_hasher::remove (value_type ds)
387 free (const_cast<char *> (ds->s));
388 free (ds);
391 /* Hash for strings we output in order not to duplicate them needlessly. */
393 static hash_table<brig_string_slot_hasher> *brig_string_htab;
395 /* Emit a null terminated string STR to the data section and return its
396 offset in it. If PREFIX is non-zero, output it just before STR too.
397 Sanitize the string if SANITIZE option is set to true. */
399 static unsigned
400 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
402 unsigned slen = strlen (str);
403 unsigned offset, len = slen + (prefix ? 1 : 0);
404 uint32_t hdr_len = lendian32 (len);
405 brig_string_slot s_slot;
406 brig_string_slot **slot;
407 char *str2;
409 str2 = xstrdup (str);
411 if (sanitize)
412 hsa_sanitize_name (str2);
413 s_slot.s = str2;
414 s_slot.len = slen;
415 s_slot.prefix = prefix;
416 s_slot.offset = 0;
418 slot = brig_string_htab->find_slot (&s_slot, INSERT);
419 if (*slot == NULL)
421 brig_string_slot *new_slot = XCNEW (brig_string_slot);
423 /* In theory we should fill in BrigData but that would mean copying
424 the string to a buffer for no reason, so we just emulate it. */
425 offset = brig_data.add (&hdr_len, sizeof (hdr_len));
426 if (prefix)
427 brig_data.add (&prefix, 1);
429 brig_data.add (str2, slen);
430 brig_data.round_size_up (4);
432 /* TODO: could use the string we just copied into
433 brig_string->cur_chunk */
434 new_slot->s = str2;
435 new_slot->len = slen;
436 new_slot->prefix = prefix;
437 new_slot->offset = offset;
438 *slot = new_slot;
440 else
442 offset = (*slot)->offset;
443 free (str2);
446 return offset;
449 /* Linked list of queued operands. */
451 static struct operand_queue
453 /* First from the chain of queued operands. */
454 hsa_op_base *first_op, *last_op;
456 /* The offset at which the next operand will be enqueued. */
457 unsigned projected_size;
459 } op_queue;
461 /* Unless already initialized, initialize infrastructure to produce BRIG. */
463 static void
464 brig_init (void)
466 brig_insn_count = 0;
468 if (brig_initialized)
469 return;
471 brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
472 brig_data.init (BRIG_SECTION_DATA_NAME);
473 brig_code.init (BRIG_SECTION_CODE_NAME);
474 brig_operand.init (BRIG_SECTION_OPERAND_NAME);
475 brig_initialized = true;
477 struct BrigDirectiveModule moddir;
478 memset (&moddir, 0, sizeof (moddir));
479 moddir.base.byteCount = lendian16 (sizeof (moddir));
481 char *modname;
482 if (main_input_filename && *main_input_filename != '\0')
484 const char *part = strrchr (main_input_filename, '/');
485 if (!part)
486 part = main_input_filename;
487 else
488 part++;
489 modname = concat ("&__hsa_module_", part, NULL);
490 char *extension = strchr (modname, '.');
491 if (extension)
492 *extension = '\0';
494 /* As in LTO mode, we have to emit a different module names. */
495 if (flag_ltrans)
497 part = strrchr (asm_file_name, '/');
498 if (!part)
499 part = asm_file_name;
500 else
501 part++;
502 char *modname2;
503 modname2 = xasprintf ("%s_%s", modname, part);
504 free (modname);
505 modname = modname2;
508 hsa_sanitize_name (modname);
509 moddir.name = brig_emit_string (modname);
510 free (modname);
512 else
513 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
514 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
515 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
516 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
517 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
518 if (hsa_machine_large_p ())
519 moddir.machineModel = BRIG_MACHINE_LARGE;
520 else
521 moddir.machineModel = BRIG_MACHINE_SMALL;
522 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
523 brig_code.add (&moddir, sizeof (moddir));
526 /* Free all BRIG data. */
528 static void
529 brig_release_data (void)
531 delete brig_string_htab;
532 brig_data.release ();
533 brig_code.release ();
534 brig_operand.release ();
536 brig_initialized = 0;
539 /* Enqueue operation OP. Return the offset at which it will be stored. */
541 static unsigned int
542 enqueue_op (hsa_op_base *op)
544 unsigned ret;
546 if (op->m_brig_op_offset)
547 return op->m_brig_op_offset;
549 ret = op_queue.projected_size;
550 op->m_brig_op_offset = op_queue.projected_size;
552 if (!op_queue.first_op)
553 op_queue.first_op = op;
554 else
555 op_queue.last_op->m_next = op;
556 op_queue.last_op = op;
558 if (is_a <hsa_op_immed *> (op))
559 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
560 else if (is_a <hsa_op_reg *> (op))
561 op_queue.projected_size += sizeof (struct BrigOperandRegister);
562 else if (is_a <hsa_op_address *> (op))
563 op_queue.projected_size += sizeof (struct BrigOperandAddress);
564 else if (is_a <hsa_op_code_ref *> (op))
565 op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
566 else if (is_a <hsa_op_code_list *> (op))
567 op_queue.projected_size += sizeof (struct BrigOperandCodeList);
568 else if (is_a <hsa_op_operand_list *> (op))
569 op_queue.projected_size += sizeof (struct BrigOperandOperandList);
570 else
571 gcc_unreachable ();
572 return ret;
575 static void emit_immediate_operand (hsa_op_immed *imm);
577 /* Emit directive describing a symbol if it has not been emitted already.
578 Return the offset of the directive. */
580 static unsigned
581 emit_directive_variable (struct hsa_symbol *symbol)
583 struct BrigDirectiveVariable dirvar;
584 unsigned name_offset;
585 static unsigned res_name_offset;
587 if (symbol->m_directive_offset)
588 return symbol->m_directive_offset;
590 memset (&dirvar, 0, sizeof (dirvar));
591 dirvar.base.byteCount = lendian16 (sizeof (dirvar));
592 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
593 dirvar.allocation = symbol->m_allocation;
595 char prefix = symbol->m_global_scope_p ? '&' : '%';
597 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
599 if (res_name_offset == 0)
600 res_name_offset = brig_emit_string (symbol->m_name, '%');
601 name_offset = res_name_offset;
603 else if (symbol->m_name)
604 name_offset = brig_emit_string (symbol->m_name, prefix);
605 else
607 char buf[64];
608 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
609 symbol->m_name_number);
610 name_offset = brig_emit_string (buf, prefix);
613 dirvar.name = lendian32 (name_offset);
615 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL)
617 hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl));
618 dirvar.init = lendian32 (enqueue_op (tmp));
620 else
621 dirvar.init = 0;
622 dirvar.type = lendian16 (symbol->m_type);
623 dirvar.segment = symbol->m_segment;
624 dirvar.align = symbol->m_align;
625 dirvar.linkage = symbol->m_linkage;
626 dirvar.dim.lo = symbol->m_dim;
627 dirvar.dim.hi = symbol->m_dim >> 32;
629 /* Global variables are just declared and linked via HSA runtime. */
630 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
631 dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
632 dirvar.reserved = 0;
634 if (symbol->m_cst_value)
636 dirvar.modifier |= BRIG_VARIABLE_CONST;
637 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
640 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
641 return symbol->m_directive_offset;
644 /* Emit directives describing either a function declaration or definition F and
645 return the produced BrigDirectiveExecutable structure. The function does
646 not take into account any instructions when calculating nextModuleEntry
647 field of the produced BrigDirectiveExecutable structure so when emitting
648 actual definitions, this field needs to be updated after all of the function
649 is actually added to the code section. */
651 static BrigDirectiveExecutable *
652 emit_function_directives (hsa_function_representation *f, bool is_declaration)
654 struct BrigDirectiveExecutable fndir;
655 unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
656 int count = 0;
657 void *ptr_to_fndir;
658 hsa_symbol *sym;
660 if (!f->m_declaration_p)
661 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
663 gcc_assert (!sym->m_emitted_to_brig);
664 sym->m_emitted_to_brig = true;
665 emit_directive_variable (sym);
666 brig_insn_count++;
669 name_offset = brig_emit_string (f->m_name, '&');
670 inarg_off = brig_code.total_size + sizeof (fndir)
671 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
672 scoped_off = inarg_off
673 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
675 if (!f->m_declaration_p)
677 count += f->m_spill_symbols.length ();
678 count += f->m_private_variables.length ();
681 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
683 memset (&fndir, 0, sizeof (fndir));
684 fndir.base.byteCount = lendian16 (sizeof (fndir));
685 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
686 : BRIG_KIND_DIRECTIVE_FUNCTION);
687 fndir.name = lendian32 (name_offset);
688 fndir.inArgCount = lendian16 (f->m_input_args.length ());
689 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
690 fndir.firstInArg = lendian32 (inarg_off);
691 fndir.firstCodeBlockEntry = lendian32 (scoped_off);
692 fndir.nextModuleEntry = lendian32 (next_toplev_off);
693 fndir.linkage = f->get_linkage ();
694 if (!f->m_declaration_p)
695 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
696 memset (&fndir.reserved, 0, sizeof (fndir.reserved));
698 /* Once we put a definition of function_offsets, we should not overwrite
699 it with a declaration of the function. */
700 if (f->m_internal_fn == NULL)
702 if (!function_offsets->get (f->m_decl) || !is_declaration)
703 function_offsets->put (f->m_decl, brig_code.total_size);
705 else
707 /* Internal function. */
708 hsa_internal_fn **slot
709 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
710 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
711 int_fn->m_offset = brig_code.total_size;
712 *slot = int_fn;
715 brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir);
717 if (f->m_output_arg)
718 emit_directive_variable (f->m_output_arg);
719 for (unsigned i = 0; i < f->m_input_args.length (); i++)
720 emit_directive_variable (f->m_input_args[i]);
722 if (!f->m_declaration_p)
724 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
726 emit_directive_variable (sym);
727 brig_insn_count++;
729 for (unsigned i = 0; i < f->m_private_variables.length (); i++)
731 emit_directive_variable (f->m_private_variables[i]);
732 brig_insn_count++;
736 return (BrigDirectiveExecutable *) ptr_to_fndir;
739 /* Emit a label directive for the given HBB. We assume it is about to start on
740 the current offset in the code section. */
742 static void
743 emit_bb_label_directive (hsa_bb *hbb)
745 struct BrigDirectiveLabel lbldir;
747 lbldir.base.byteCount = lendian16 (sizeof (lbldir));
748 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
749 char buf[32];
750 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
751 hbb->m_index);
752 lbldir.name = lendian32 (brig_emit_string (buf, '@'));
754 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
755 sizeof (lbldir));
756 brig_insn_count++;
759 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
760 holding such, for constants and registers. */
762 static BrigType16_t
763 regtype_for_type (BrigType16_t t)
765 switch (t)
767 case BRIG_TYPE_B1:
768 return BRIG_TYPE_B1;
770 case BRIG_TYPE_U8:
771 case BRIG_TYPE_U16:
772 case BRIG_TYPE_U32:
773 case BRIG_TYPE_S8:
774 case BRIG_TYPE_S16:
775 case BRIG_TYPE_S32:
776 case BRIG_TYPE_B8:
777 case BRIG_TYPE_B16:
778 case BRIG_TYPE_B32:
779 case BRIG_TYPE_F16:
780 case BRIG_TYPE_F32:
781 case BRIG_TYPE_U8X4:
782 case BRIG_TYPE_U16X2:
783 case BRIG_TYPE_S8X4:
784 case BRIG_TYPE_S16X2:
785 case BRIG_TYPE_F16X2:
786 return BRIG_TYPE_B32;
788 case BRIG_TYPE_U64:
789 case BRIG_TYPE_S64:
790 case BRIG_TYPE_F64:
791 case BRIG_TYPE_B64:
792 case BRIG_TYPE_U8X8:
793 case BRIG_TYPE_U16X4:
794 case BRIG_TYPE_U32X2:
795 case BRIG_TYPE_S8X8:
796 case BRIG_TYPE_S16X4:
797 case BRIG_TYPE_S32X2:
798 case BRIG_TYPE_F16X4:
799 case BRIG_TYPE_F32X2:
800 return BRIG_TYPE_B64;
802 case BRIG_TYPE_B128:
803 case BRIG_TYPE_U8X16:
804 case BRIG_TYPE_U16X8:
805 case BRIG_TYPE_U32X4:
806 case BRIG_TYPE_U64X2:
807 case BRIG_TYPE_S8X16:
808 case BRIG_TYPE_S16X8:
809 case BRIG_TYPE_S32X4:
810 case BRIG_TYPE_S64X2:
811 case BRIG_TYPE_F16X8:
812 case BRIG_TYPE_F32X4:
813 case BRIG_TYPE_F64X2:
814 return BRIG_TYPE_B128;
816 default:
817 gcc_unreachable ();
821 /* Return the length of the BRIG type TYPE that is going to be streamed out as
822 an immediate constant (so it must not be B1). */
824 unsigned
825 hsa_get_imm_brig_type_len (BrigType16_t type)
827 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
828 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
830 switch (pack_type)
832 case BRIG_TYPE_PACK_NONE:
833 break;
834 case BRIG_TYPE_PACK_32:
835 return 4;
836 case BRIG_TYPE_PACK_64:
837 return 8;
838 case BRIG_TYPE_PACK_128:
839 return 16;
840 default:
841 gcc_unreachable ();
844 switch (base_type)
846 case BRIG_TYPE_U8:
847 case BRIG_TYPE_S8:
848 case BRIG_TYPE_B8:
849 return 1;
850 case BRIG_TYPE_U16:
851 case BRIG_TYPE_S16:
852 case BRIG_TYPE_F16:
853 case BRIG_TYPE_B16:
854 return 2;
855 case BRIG_TYPE_U32:
856 case BRIG_TYPE_S32:
857 case BRIG_TYPE_F32:
858 case BRIG_TYPE_B32:
859 return 4;
860 case BRIG_TYPE_U64:
861 case BRIG_TYPE_S64:
862 case BRIG_TYPE_F64:
863 case BRIG_TYPE_B64:
864 return 8;
865 case BRIG_TYPE_B128:
866 return 16;
867 default:
868 gcc_unreachable ();
872 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
873 If NEED_LEN is not equal to zero, shrink or extend the value
874 to NEED_LEN bytes. Return how many bytes were written. */
876 static int
877 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
879 union hsa_bytes bytes;
881 memset (&bytes, 0, sizeof (bytes));
882 tree type = TREE_TYPE (value);
883 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
885 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
886 if (INTEGRAL_TYPE_P (type)
887 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
888 switch (data_len)
890 case 1:
891 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
892 break;
893 case 2:
894 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
895 break;
896 case 4:
897 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
898 break;
899 case 8:
900 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
901 break;
902 default:
903 gcc_unreachable ();
905 else if (SCALAR_FLOAT_TYPE_P (type))
907 if (data_len == 2)
909 sorry ("Support for HSA does not implement immediate 16 bit FPU "
910 "operands");
911 return 2;
913 unsigned int_len = GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type));
914 /* There are always 32 bits in each long, no matter the size of
915 the hosts long. */
916 long tmp[6];
918 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
920 if (int_len == 4)
921 bytes.b32 = (uint32_t) tmp[0];
922 else
924 bytes.b64 = (uint64_t)(uint32_t) tmp[1];
925 bytes.b64 <<= 32;
926 bytes.b64 |= (uint32_t) tmp[0];
929 else
930 gcc_unreachable ();
932 int len;
933 if (need_len == 0)
934 len = data_len;
935 else
936 len = need_len;
938 memcpy (data, &bytes, len);
939 return len;
942 char *
943 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
945 char *brig_repr;
946 *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
948 if (m_tree_value != NULL_TREE)
950 /* Update brig_repr_size for special tree values. */
951 if (TREE_CODE (m_tree_value) == STRING_CST)
952 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
953 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
954 *brig_repr_size
955 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
957 unsigned total_len = *brig_repr_size;
959 /* As we can have a constructor with fewer elements, fill the memory
960 with zeros. */
961 brig_repr = XCNEWVEC (char, total_len);
962 char *p = brig_repr;
964 if (TREE_CODE (m_tree_value) == VECTOR_CST)
966 /* Variable-length vectors aren't supported. */
967 int i, num = VECTOR_CST_NELTS (m_tree_value).to_constant ();
968 for (i = 0; i < num; i++)
970 tree v = VECTOR_CST_ELT (m_tree_value, i);
971 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
972 total_len -= actual;
973 p += actual;
975 /* Vectors should have the exact size. */
976 gcc_assert (total_len == 0);
978 else if (TREE_CODE (m_tree_value) == STRING_CST)
979 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
980 TREE_STRING_LENGTH (m_tree_value));
981 else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
983 gcc_assert (total_len % 2 == 0);
984 unsigned actual;
985 actual
986 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
987 total_len / 2);
989 gcc_assert (actual == total_len / 2);
990 p += actual;
992 actual
993 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
994 total_len / 2);
995 gcc_assert (actual == total_len / 2);
997 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
999 unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
1000 for (unsigned i = 0; i < len; i++)
1002 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
1003 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
1004 total_len -= actual;
1005 p += actual;
1008 else
1009 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
1011 else
1013 hsa_bytes bytes;
1015 switch (*brig_repr_size)
1017 case 1:
1018 bytes.b8 = (uint8_t) m_int_value;
1019 break;
1020 case 2:
1021 bytes.b16 = (uint16_t) m_int_value;
1022 break;
1023 case 4:
1024 bytes.b32 = (uint32_t) m_int_value;
1025 break;
1026 case 8:
1027 bytes.b64 = (uint64_t) m_int_value;
1028 break;
1029 default:
1030 gcc_unreachable ();
1033 brig_repr = XNEWVEC (char, *brig_repr_size);
1034 memcpy (brig_repr, &bytes, *brig_repr_size);
1037 return brig_repr;
1040 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1041 have been massaged to comply with various HSA/BRIG type requirements, so the
1042 only important aspect of that is the length (because HSAIL might expect
1043 smaller constants or become bit-data). The data should be represented
1044 according to what is in the tree representation. */
1046 static void
1047 emit_immediate_operand (hsa_op_immed *imm)
1049 unsigned brig_repr_size;
1050 char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1051 struct BrigOperandConstantBytes out;
1053 memset (&out, 0, sizeof (out));
1054 out.base.byteCount = lendian16 (sizeof (out));
1055 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1056 uint32_t byteCount = lendian32 (brig_repr_size);
1057 out.type = lendian16 (imm->m_type);
1058 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1059 brig_operand.add (&out, sizeof (out));
1060 brig_data.add (brig_repr, brig_repr_size);
1061 brig_data.round_size_up (4);
1063 free (brig_repr);
1066 /* Emit a register BRIG operand REG. */
1068 static void
1069 emit_register_operand (hsa_op_reg *reg)
1071 struct BrigOperandRegister out;
1073 out.base.byteCount = lendian16 (sizeof (out));
1074 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1075 out.regNum = lendian32 (reg->m_hard_num);
1077 switch (regtype_for_type (reg->m_type))
1079 case BRIG_TYPE_B32:
1080 out.regKind = BRIG_REGISTER_KIND_SINGLE;
1081 break;
1082 case BRIG_TYPE_B64:
1083 out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1084 break;
1085 case BRIG_TYPE_B128:
1086 out.regKind = BRIG_REGISTER_KIND_QUAD;
1087 break;
1088 case BRIG_TYPE_B1:
1089 out.regKind = BRIG_REGISTER_KIND_CONTROL;
1090 break;
1091 default:
1092 gcc_unreachable ();
1095 brig_operand.add (&out, sizeof (out));
1098 /* Emit an address BRIG operand ADDR. */
1100 static void
1101 emit_address_operand (hsa_op_address *addr)
1103 struct BrigOperandAddress out;
1105 out.base.byteCount = lendian16 (sizeof (out));
1106 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1107 out.symbol = addr->m_symbol
1108 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1109 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1111 if (sizeof (addr->m_imm_offset) == 8)
1113 out.offset.lo = lendian32 (addr->m_imm_offset);
1114 out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1116 else
1118 gcc_assert (sizeof (addr->m_imm_offset) == 4);
1119 out.offset.lo = lendian32 (addr->m_imm_offset);
1120 out.offset.hi = 0;
1123 brig_operand.add (&out, sizeof (out));
1126 /* Emit a code reference operand REF. */
1128 static void
1129 emit_code_ref_operand (hsa_op_code_ref *ref)
1131 struct BrigOperandCodeRef out;
1133 out.base.byteCount = lendian16 (sizeof (out));
1134 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1135 out.ref = lendian32 (ref->m_directive_offset);
1136 brig_operand.add (&out, sizeof (out));
1139 /* Emit a code list operand CODE_LIST. */
1141 static void
1142 emit_code_list_operand (hsa_op_code_list *code_list)
1144 struct BrigOperandCodeList out;
1145 unsigned args = code_list->m_offsets.length ();
1147 for (unsigned i = 0; i < args; i++)
1148 gcc_assert (code_list->m_offsets[i]);
1150 out.base.byteCount = lendian16 (sizeof (out));
1151 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1153 uint32_t byteCount = lendian32 (4 * args);
1155 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1156 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1157 brig_data.round_size_up (4);
1158 brig_operand.add (&out, sizeof (out));
1161 /* Emit an operand list operand OPERAND_LIST. */
1163 static void
1164 emit_operand_list_operand (hsa_op_operand_list *operand_list)
1166 struct BrigOperandOperandList out;
1167 unsigned args = operand_list->m_offsets.length ();
1169 for (unsigned i = 0; i < args; i++)
1170 gcc_assert (operand_list->m_offsets[i]);
1172 out.base.byteCount = lendian16 (sizeof (out));
1173 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1175 uint32_t byteCount = lendian32 (4 * args);
1177 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1178 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1179 brig_data.round_size_up (4);
1180 brig_operand.add (&out, sizeof (out));
1183 /* Emit all operands queued for writing. */
1185 static void
1186 emit_queued_operands (void)
1188 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1190 gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1191 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1192 emit_immediate_operand (imm);
1193 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1194 emit_register_operand (reg);
1195 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1196 emit_address_operand (addr);
1197 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1198 emit_code_ref_operand (ref);
1199 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1200 emit_code_list_operand (code_list);
1201 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1202 emit_operand_list_operand (l);
1203 else
1204 gcc_unreachable ();
1208 /* Emit directives describing the function that is used for
1209 a function declaration. */
1211 static BrigDirectiveExecutable *
1212 emit_function_declaration (tree decl)
1214 hsa_function_representation *f = hsa_generate_function_declaration (decl);
1216 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1217 emit_queued_operands ();
1219 delete f;
1221 return e;
1224 /* Emit directives describing the function that is used for
1225 an internal function declaration. */
1227 static BrigDirectiveExecutable *
1228 emit_internal_fn_decl (hsa_internal_fn *fn)
1230 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1232 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1233 emit_queued_operands ();
1235 delete f;
1237 return e;
1240 /* Enqueue all operands of INSN and return offset to BRIG data section
1241 to list of operand offsets. */
1243 static unsigned
1244 emit_insn_operands (hsa_insn_basic *insn)
1246 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1247 operand_offsets;
1249 unsigned l = insn->operand_count ();
1251 /* We have N operands so use 4 * N for the byte_count. */
1252 uint32_t byte_count = lendian32 (4 * l);
1253 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1254 if (l > 0)
1256 operand_offsets.safe_grow (l);
1257 for (unsigned i = 0; i < l; i++)
1258 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1260 brig_data.add (operand_offsets.address (),
1261 l * sizeof (BrigOperandOffset32_t));
1263 brig_data.round_size_up (4);
1264 return offset;
1267 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1268 to BRIG data section to list of operand offsets. */
1270 static unsigned
1271 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1272 hsa_op_base *op2 = NULL)
1274 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1275 operand_offsets;
1277 gcc_checking_assert (op0 != NULL);
1278 operand_offsets.safe_push (enqueue_op (op0));
1280 if (op1 != NULL)
1282 operand_offsets.safe_push (enqueue_op (op1));
1283 if (op2 != NULL)
1284 operand_offsets.safe_push (enqueue_op (op2));
1287 unsigned l = operand_offsets.length ();
1289 /* We have N operands so use 4 * N for the byte_count. */
1290 uint32_t byte_count = lendian32 (4 * l);
1292 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1293 brig_data.add (operand_offsets.address (),
1294 l * sizeof (BrigOperandOffset32_t));
1296 brig_data.round_size_up (4);
1298 return offset;
1301 /* Emit an HSA memory instruction and all necessary directives, schedule
1302 necessary operands for writing. */
1304 static void
1305 emit_memory_insn (hsa_insn_mem *mem)
1307 struct BrigInstMem repr;
1308 gcc_checking_assert (mem->operand_count () == 2);
1310 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1312 /* This is necessary because of the erroneous typedef of
1313 BrigMemoryModifier8_t which introduces padding which may then contain
1314 random stuff (which we do not want so that we can test things don't
1315 change). */
1316 memset (&repr, 0, sizeof (repr));
1317 repr.base.base.byteCount = lendian16 (sizeof (repr));
1318 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1319 repr.base.opcode = lendian16 (mem->m_opcode);
1320 repr.base.type = lendian16 (mem->m_type);
1321 repr.base.operands = lendian32 (emit_insn_operands (mem));
1323 if (addr->m_symbol)
1324 repr.segment = addr->m_symbol->m_segment;
1325 else
1326 repr.segment = BRIG_SEGMENT_FLAT;
1327 repr.modifier = 0;
1328 repr.equivClass = mem->m_equiv_class;
1329 repr.align = mem->m_align;
1330 if (mem->m_opcode == BRIG_OPCODE_LD)
1331 repr.width = BRIG_WIDTH_1;
1332 else
1333 repr.width = BRIG_WIDTH_NONE;
1334 memset (&repr.reserved, 0, sizeof (repr.reserved));
1335 brig_code.add (&repr, sizeof (repr));
1336 brig_insn_count++;
1339 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1340 necessary operands for writing. */
1342 static void
1343 emit_signal_insn (hsa_insn_signal *mem)
1345 struct BrigInstSignal repr;
1347 memset (&repr, 0, sizeof (repr));
1348 repr.base.base.byteCount = lendian16 (sizeof (repr));
1349 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1350 repr.base.opcode = lendian16 (mem->m_opcode);
1351 repr.base.type = lendian16 (mem->m_type);
1352 repr.base.operands = lendian32 (emit_insn_operands (mem));
1354 repr.memoryOrder = mem->m_memory_order;
1355 repr.signalOperation = mem->m_signalop;
1356 repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32;
1358 brig_code.add (&repr, sizeof (repr));
1359 brig_insn_count++;
1362 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1363 necessary operands for writing. */
1365 static void
1366 emit_atomic_insn (hsa_insn_atomic *mem)
1368 struct BrigInstAtomic repr;
1370 /* Either operand[0] or operand[1] must be an address operand. */
1371 hsa_op_address *addr = NULL;
1372 if (is_a <hsa_op_address *> (mem->get_op (0)))
1373 addr = as_a <hsa_op_address *> (mem->get_op (0));
1374 else
1375 addr = as_a <hsa_op_address *> (mem->get_op (1));
1377 memset (&repr, 0, sizeof (repr));
1378 repr.base.base.byteCount = lendian16 (sizeof (repr));
1379 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1380 repr.base.opcode = lendian16 (mem->m_opcode);
1381 repr.base.type = lendian16 (mem->m_type);
1382 repr.base.operands = lendian32 (emit_insn_operands (mem));
1384 if (addr->m_symbol)
1385 repr.segment = addr->m_symbol->m_segment;
1386 else
1387 repr.segment = BRIG_SEGMENT_FLAT;
1388 repr.memoryOrder = mem->m_memoryorder;
1389 repr.memoryScope = mem->m_memoryscope;
1390 repr.atomicOperation = mem->m_atomicop;
1392 brig_code.add (&repr, sizeof (repr));
1393 brig_insn_count++;
1396 /* Emit an HSA LDA instruction and all necessary directives, schedule
1397 necessary operands for writing. */
1399 static void
1400 emit_addr_insn (hsa_insn_basic *insn)
1402 struct BrigInstAddr repr;
1404 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1406 repr.base.base.byteCount = lendian16 (sizeof (repr));
1407 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1408 repr.base.opcode = lendian16 (insn->m_opcode);
1409 repr.base.type = lendian16 (insn->m_type);
1410 repr.base.operands = lendian32 (emit_insn_operands (insn));
1412 if (addr->m_symbol)
1413 repr.segment = addr->m_symbol->m_segment;
1414 else
1415 repr.segment = BRIG_SEGMENT_FLAT;
1416 memset (&repr.reserved, 0, sizeof (repr.reserved));
1418 brig_code.add (&repr, sizeof (repr));
1419 brig_insn_count++;
1422 /* Emit an HSA segment conversion instruction and all necessary directives,
1423 schedule necessary operands for writing. */
1425 static void
1426 emit_segment_insn (hsa_insn_seg *seg)
1428 struct BrigInstSegCvt repr;
1430 repr.base.base.byteCount = lendian16 (sizeof (repr));
1431 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1432 repr.base.opcode = lendian16 (seg->m_opcode);
1433 repr.base.type = lendian16 (seg->m_type);
1434 repr.base.operands = lendian32 (emit_insn_operands (seg));
1435 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1436 repr.segment = seg->m_segment;
1437 repr.modifier = 0;
1439 brig_code.add (&repr, sizeof (repr));
1441 brig_insn_count++;
1444 /* Emit an HSA alloca instruction and all necessary directives,
1445 schedule necessary operands for writing. */
1447 static void
1448 emit_alloca_insn (hsa_insn_alloca *alloca)
1450 struct BrigInstMem repr;
1451 gcc_checking_assert (alloca->operand_count () == 2);
1453 memset (&repr, 0, sizeof (repr));
1454 repr.base.base.byteCount = lendian16 (sizeof (repr));
1455 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1456 repr.base.opcode = lendian16 (alloca->m_opcode);
1457 repr.base.type = lendian16 (alloca->m_type);
1458 repr.base.operands = lendian32 (emit_insn_operands (alloca));
1459 repr.segment = BRIG_SEGMENT_PRIVATE;
1460 repr.modifier = 0;
1461 repr.equivClass = 0;
1462 repr.align = alloca->m_align;
1463 repr.width = BRIG_WIDTH_NONE;
1464 memset (&repr.reserved, 0, sizeof (repr.reserved));
1465 brig_code.add (&repr, sizeof (repr));
1466 brig_insn_count++;
1469 /* Emit an HSA comparison instruction and all necessary directives,
1470 schedule necessary operands for writing. */
1472 static void
1473 emit_cmp_insn (hsa_insn_cmp *cmp)
1475 struct BrigInstCmp repr;
1477 memset (&repr, 0, sizeof (repr));
1478 repr.base.base.byteCount = lendian16 (sizeof (repr));
1479 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1480 repr.base.opcode = lendian16 (cmp->m_opcode);
1481 repr.base.type = lendian16 (cmp->m_type);
1482 repr.base.operands = lendian32 (emit_insn_operands (cmp));
1484 if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1485 repr.sourceType
1486 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1487 else
1488 repr.sourceType
1489 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1490 repr.modifier = 0;
1491 repr.compare = cmp->m_compare;
1492 repr.pack = 0;
1494 brig_code.add (&repr, sizeof (repr));
1495 brig_insn_count++;
1498 /* Emit an HSA generic branching/sycnronization instruction. */
1500 static void
1501 emit_generic_branch_insn (hsa_insn_br *br)
1503 struct BrigInstBr repr;
1504 repr.base.base.byteCount = lendian16 (sizeof (repr));
1505 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1506 repr.base.opcode = lendian16 (br->m_opcode);
1507 repr.width = br->m_width;
1508 repr.base.type = lendian16 (br->m_type);
1509 repr.base.operands = lendian32 (emit_insn_operands (br));
1510 memset (&repr.reserved, 0, sizeof (repr.reserved));
1512 brig_code.add (&repr, sizeof (repr));
1513 brig_insn_count++;
1516 /* Emit an HSA conditional branching instruction and all necessary directives,
1517 schedule necessary operands for writing. */
1519 static void
1520 emit_cond_branch_insn (hsa_insn_cbr *br)
1522 struct BrigInstBr repr;
1524 basic_block target = NULL;
1525 edge_iterator ei;
1526 edge e;
1528 /* At the moment we only handle direct conditional jumps. */
1529 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1530 repr.base.base.byteCount = lendian16 (sizeof (repr));
1531 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1532 repr.base.opcode = lendian16 (br->m_opcode);
1533 repr.width = br->m_width;
1534 /* For Conditional jumps the type is always B1. */
1535 repr.base.type = lendian16 (BRIG_TYPE_B1);
1537 FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1538 if (e->flags & EDGE_TRUE_VALUE)
1540 target = e->dest;
1541 break;
1543 gcc_assert (target);
1545 repr.base.operands
1546 = lendian32 (emit_operands (br->get_op (0),
1547 &hsa_bb_for_bb (target)->m_label_ref));
1548 memset (&repr.reserved, 0, sizeof (repr.reserved));
1550 brig_code.add (&repr, sizeof (repr));
1551 brig_insn_count++;
1554 /* Emit an HSA unconditional jump branching instruction that points to
1555 a label REFERENCE. */
1557 static void
1558 emit_unconditional_jump (hsa_op_code_ref *reference)
1560 struct BrigInstBr repr;
1562 repr.base.base.byteCount = lendian16 (sizeof (repr));
1563 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1564 repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1565 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1566 /* Direct branches to labels must be width(all). */
1567 repr.width = BRIG_WIDTH_ALL;
1569 repr.base.operands = lendian32 (emit_operands (reference));
1570 memset (&repr.reserved, 0, sizeof (repr.reserved));
1571 brig_code.add (&repr, sizeof (repr));
1572 brig_insn_count++;
1575 /* Emit an HSA switch jump instruction that uses a jump table to
1576 jump to a destination label. */
1578 static void
1579 emit_switch_insn (hsa_insn_sbr *sbr)
1581 struct BrigInstBr repr;
1583 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1584 repr.base.base.byteCount = lendian16 (sizeof (repr));
1585 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1586 repr.base.opcode = lendian16 (sbr->m_opcode);
1587 repr.width = BRIG_WIDTH_1;
1588 /* For Conditional jumps the type is always B1. */
1589 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1590 repr.base.type = lendian16 (index->m_type);
1591 repr.base.operands
1592 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1593 memset (&repr.reserved, 0, sizeof (repr.reserved));
1595 brig_code.add (&repr, sizeof (repr));
1596 brig_insn_count++;
1599 /* Emit a HSA convert instruction and all necessary directives, schedule
1600 necessary operands for writing. */
1602 static void
1603 emit_cvt_insn (hsa_insn_cvt *insn)
1605 struct BrigInstCvt repr;
1606 BrigType16_t srctype;
1608 repr.base.base.byteCount = lendian16 (sizeof (repr));
1609 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1610 repr.base.opcode = lendian16 (insn->m_opcode);
1611 repr.base.type = lendian16 (insn->m_type);
1612 repr.base.operands = lendian32 (emit_insn_operands (insn));
1614 if (is_a <hsa_op_reg *> (insn->get_op (1)))
1615 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1616 else
1617 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1618 repr.sourceType = lendian16 (srctype);
1619 repr.modifier = 0;
1620 /* float to smaller float requires a rounding setting (we default
1621 to 'near'. */
1622 if (hsa_type_float_p (insn->m_type)
1623 && (!hsa_type_float_p (srctype)
1624 || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1625 < (srctype & BRIG_TYPE_BASE_MASK))))
1626 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1627 else if (hsa_type_integer_p (insn->m_type) &&
1628 hsa_type_float_p (srctype))
1629 repr.round = BRIG_ROUND_INTEGER_ZERO;
1630 else
1631 repr.round = BRIG_ROUND_NONE;
1632 brig_code.add (&repr, sizeof (repr));
1633 brig_insn_count++;
1636 /* Emit call instruction INSN, where this instruction must be closed
1637 within a call block instruction. */
1639 static void
1640 emit_call_insn (hsa_insn_call *call)
1642 struct BrigInstBr repr;
1644 repr.base.base.byteCount = lendian16 (sizeof (repr));
1645 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1646 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1647 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1649 repr.base.operands
1650 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1651 call->m_args_code_list));
1653 /* Internal functions have not set m_called_function. */
1654 if (call->m_called_function)
1656 function_linkage_pair pair (call->m_called_function,
1657 call->m_func.m_brig_op_offset);
1658 function_call_linkage.safe_push (pair);
1660 else
1662 hsa_internal_fn *slot
1663 = hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1664 gcc_assert (slot);
1665 gcc_assert (slot->m_offset > 0);
1666 call->m_func.m_directive_offset = slot->m_offset;
1669 repr.width = BRIG_WIDTH_ALL;
1670 memset (&repr.reserved, 0, sizeof (repr.reserved));
1672 brig_code.add (&repr, sizeof (repr));
1673 brig_insn_count++;
1676 /* Emit argument block directive. */
1678 static void
1679 emit_arg_block_insn (hsa_insn_arg_block *insn)
1681 switch (insn->m_kind)
1683 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1685 struct BrigDirectiveArgBlock repr;
1686 repr.base.byteCount = lendian16 (sizeof (repr));
1687 repr.base.kind = lendian16 (insn->m_kind);
1688 brig_code.add (&repr, sizeof (repr));
1690 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1692 insn->m_call_insn->m_args_code_list->m_offsets[i]
1693 = lendian32 (emit_directive_variable
1694 (insn->m_call_insn->m_input_args[i]));
1695 brig_insn_count++;
1698 if (insn->m_call_insn->m_output_arg)
1700 insn->m_call_insn->m_result_code_list->m_offsets[0]
1701 = lendian32 (emit_directive_variable
1702 (insn->m_call_insn->m_output_arg));
1703 brig_insn_count++;
1706 break;
1708 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1710 struct BrigDirectiveArgBlock repr;
1711 repr.base.byteCount = lendian16 (sizeof (repr));
1712 repr.base.kind = lendian16 (insn->m_kind);
1713 brig_code.add (&repr, sizeof (repr));
1714 break;
1716 default:
1717 gcc_unreachable ();
1720 brig_insn_count++;
1723 /* Emit comment directive. */
1725 static void
1726 emit_comment_insn (hsa_insn_comment *insn)
1728 struct BrigDirectiveComment repr;
1729 memset (&repr, 0, sizeof (repr));
1731 repr.base.byteCount = lendian16 (sizeof (repr));
1732 repr.base.kind = lendian16 (insn->m_opcode);
1733 repr.name = brig_emit_string (insn->m_comment, '\0', false);
1734 brig_code.add (&repr, sizeof (repr));
1737 /* Emit queue instruction INSN. */
1739 static void
1740 emit_queue_insn (hsa_insn_queue *insn)
1742 BrigInstQueue repr;
1743 memset (&repr, 0, sizeof (repr));
1745 repr.base.base.byteCount = lendian16 (sizeof (repr));
1746 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1747 repr.base.opcode = lendian16 (insn->m_opcode);
1748 repr.base.type = lendian16 (insn->m_type);
1749 repr.segment = insn->m_segment;
1750 repr.memoryOrder = insn->m_memory_order;
1751 repr.base.operands = lendian32 (emit_insn_operands (insn));
1752 brig_data.round_size_up (4);
1753 brig_code.add (&repr, sizeof (repr));
1755 brig_insn_count++;
1758 /* Emit source type instruction INSN. */
1760 static void
1761 emit_srctype_insn (hsa_insn_srctype *insn)
1763 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1764 struct BrigInstSourceType repr;
1765 unsigned operand_count = insn->operand_count ();
1766 gcc_checking_assert (operand_count >= 2);
1768 memset (&repr, 0, sizeof (repr));
1769 repr.sourceType = lendian16 (insn->m_source_type);
1770 repr.base.base.byteCount = lendian16 (sizeof (repr));
1771 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1772 repr.base.opcode = lendian16 (insn->m_opcode);
1773 repr.base.type = lendian16 (insn->m_type);
1775 repr.base.operands = lendian32 (emit_insn_operands (insn));
1776 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1777 brig_insn_count++;
1780 /* Emit packed instruction INSN. */
1782 static void
1783 emit_packed_insn (hsa_insn_packed *insn)
1785 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1786 struct BrigInstSourceType repr;
1787 unsigned operand_count = insn->operand_count ();
1788 gcc_checking_assert (operand_count >= 2);
1790 memset (&repr, 0, sizeof (repr));
1791 repr.sourceType = lendian16 (insn->m_source_type);
1792 repr.base.base.byteCount = lendian16 (sizeof (repr));
1793 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1794 repr.base.opcode = lendian16 (insn->m_opcode);
1795 repr.base.type = lendian16 (insn->m_type);
1797 if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1799 /* Create operand list for packed type. */
1800 for (unsigned i = 1; i < operand_count; i++)
1802 gcc_checking_assert (insn->get_op (i));
1803 insn->m_operand_list->m_offsets[i - 1]
1804 = lendian32 (enqueue_op (insn->get_op (i)));
1807 repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1808 insn->m_operand_list));
1810 else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1812 /* Create operand list for packed type. */
1813 for (unsigned i = 0; i < operand_count - 1; i++)
1815 gcc_checking_assert (insn->get_op (i));
1816 insn->m_operand_list->m_offsets[i]
1817 = lendian32 (enqueue_op (insn->get_op (i)));
1820 unsigned ops = emit_operands (insn->m_operand_list,
1821 insn->get_op (insn->operand_count () - 1));
1822 repr.base.operands = lendian32 (ops);
1826 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1827 brig_insn_count++;
1830 /* Emit a basic HSA instruction and all necessary directives, schedule
1831 necessary operands for writing. */
1833 static void
1834 emit_basic_insn (hsa_insn_basic *insn)
1836 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1837 struct BrigInstMod repr;
1838 BrigType16_t type;
1840 memset (&repr, 0, sizeof (repr));
1841 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1842 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1843 repr.base.opcode = lendian16 (insn->m_opcode);
1844 switch (insn->m_opcode)
1846 /* And the bit-logical operations need bit types and whine about
1847 arithmetic types :-/ */
1848 case BRIG_OPCODE_AND:
1849 case BRIG_OPCODE_OR:
1850 case BRIG_OPCODE_XOR:
1851 case BRIG_OPCODE_NOT:
1852 type = regtype_for_type (insn->m_type);
1853 break;
1854 default:
1855 type = insn->m_type;
1856 break;
1858 repr.base.type = lendian16 (type);
1859 repr.base.operands = lendian32 (emit_insn_operands (insn));
1861 if (hsa_type_packed_p (type))
1863 if (hsa_type_float_p (type)
1864 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1865 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1866 else
1867 repr.round = 0;
1868 /* We assume that destination and sources agree in packing layout. */
1869 if (insn->num_used_ops () >= 2)
1870 repr.pack = BRIG_PACK_PP;
1871 else
1872 repr.pack = BRIG_PACK_P;
1873 repr.reserved = 0;
1874 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1875 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1876 brig_code.add (&repr, sizeof (struct BrigInstMod));
1878 else
1879 brig_code.add (&repr, sizeof (struct BrigInstBasic));
1880 brig_insn_count++;
1883 /* Emit an HSA instruction and all necessary directives, schedule necessary
1884 operands for writing. */
1886 static void
1887 emit_insn (hsa_insn_basic *insn)
1889 gcc_assert (!is_a <hsa_insn_phi *> (insn));
1891 insn->m_brig_offset = brig_code.total_size;
1893 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1894 emit_signal_insn (signal);
1895 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1896 emit_atomic_insn (atom);
1897 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1898 emit_memory_insn (mem);
1899 else if (insn->m_opcode == BRIG_OPCODE_LDA)
1900 emit_addr_insn (insn);
1901 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1902 emit_segment_insn (seg);
1903 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1904 emit_cmp_insn (cmp);
1905 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
1906 emit_cond_branch_insn (br);
1907 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1909 if (switch_instructions == NULL)
1910 switch_instructions = new vec <hsa_insn_sbr *> ();
1912 switch_instructions->safe_push (sbr);
1913 emit_switch_insn (sbr);
1915 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1916 emit_generic_branch_insn (br);
1917 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1918 emit_arg_block_insn (block);
1919 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1920 emit_call_insn (call);
1921 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1922 emit_comment_insn (comment);
1923 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1924 emit_queue_insn (queue);
1925 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1926 emit_srctype_insn (srctype);
1927 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1928 emit_packed_insn (packed);
1929 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1930 emit_cvt_insn (cvt);
1931 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1932 emit_alloca_insn (alloca);
1933 else
1934 emit_basic_insn (insn);
1937 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1938 or we are about to finish emitting code, if it is NULL. If the fall through
1939 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1941 static void
1942 perhaps_emit_branch (basic_block bb, basic_block next_bb)
1944 basic_block t_bb = NULL, ff = NULL;
1946 edge_iterator ei;
1947 edge e;
1949 /* If the last instruction of BB is a switch, ignore emission of all
1950 edges. */
1951 if (hsa_bb_for_bb (bb)->m_last_insn
1952 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1953 return;
1955 FOR_EACH_EDGE (e, ei, bb->succs)
1956 if (e->flags & EDGE_TRUE_VALUE)
1958 gcc_assert (!t_bb);
1959 t_bb = e->dest;
1961 else
1963 gcc_assert (!ff);
1964 ff = e->dest;
1967 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1968 return;
1970 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1973 /* Emit the a function with name NAME to the various brig sections. */
1975 void
1976 hsa_brig_emit_function (void)
1978 basic_block bb, prev_bb;
1979 hsa_insn_basic *insn;
1980 BrigDirectiveExecutable *ptr_to_fndir;
1982 brig_init ();
1984 brig_insn_count = 0;
1985 memset (&op_queue, 0, sizeof (op_queue));
1986 op_queue.projected_size = brig_operand.total_size;
1988 if (!function_offsets)
1989 function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1991 if (!emitted_declarations)
1992 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1994 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1996 tree called = hsa_cfun->m_called_functions[i];
1998 /* If the function has no definition, emit a declaration. */
1999 if (!emitted_declarations->get (called))
2001 BrigDirectiveExecutable *e = emit_function_declaration (called);
2002 emitted_declarations->put (called, e);
2006 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
2008 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
2009 emit_internal_fn_decl (called);
2012 ptr_to_fndir = emit_function_directives (hsa_cfun, false);
2013 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
2014 insn;
2015 insn = insn->m_next)
2016 emit_insn (insn);
2017 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2018 FOR_EACH_BB_FN (bb, cfun)
2020 perhaps_emit_branch (prev_bb, bb);
2021 emit_bb_label_directive (hsa_bb_for_bb (bb));
2022 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2023 emit_insn (insn);
2024 prev_bb = bb;
2026 perhaps_emit_branch (prev_bb, NULL);
2027 ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size);
2029 /* Fill up label references for all sbr instructions. */
2030 if (switch_instructions)
2032 for (unsigned i = 0; i < switch_instructions->length (); i++)
2034 hsa_insn_sbr *sbr = (*switch_instructions)[i];
2035 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2037 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2038 sbr->m_label_code_list->m_offsets[j]
2039 = hbb->m_label_ref.m_directive_offset;
2043 switch_instructions->release ();
2044 delete switch_instructions;
2045 switch_instructions = NULL;
2048 if (dump_file)
2050 fprintf (dump_file, "------- After BRIG emission: -------\n");
2051 dump_hsa_cfun (dump_file);
2054 emit_queued_operands ();
2057 /* Emit all OMP symbols related to OMP. */
2059 void
2060 hsa_brig_emit_omp_symbols (void)
2062 brig_init ();
2063 emit_directive_variable (hsa_num_threads);
2066 /* Create and return __hsa_global_variables symbol that contains
2067 all informations consumed by libgomp to link global variables
2068 with their string names used by an HSA kernel. */
2070 static tree
2071 hsa_output_global_variables ()
2073 unsigned l = hsa_global_variable_symbols->elements ();
2075 tree variable_info_type = make_node (RECORD_TYPE);
2076 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2077 get_identifier ("name"), ptr_type_node);
2078 DECL_CHAIN (id_f1) = NULL_TREE;
2079 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2080 get_identifier ("omp_data_size"),
2081 ptr_type_node);
2082 DECL_CHAIN (id_f2) = id_f1;
2083 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2084 NULL_TREE);
2086 tree int_num_of_global_vars;
2087 int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2088 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2089 tree global_vars_array_type = build_array_type (variable_info_type,
2090 global_vars_num_index_type);
2091 TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2093 vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2095 for (hash_table <hsa_noop_symbol_hasher>::iterator it
2096 = hsa_global_variable_symbols->begin ();
2097 it != hsa_global_variable_symbols->end (); ++it)
2099 unsigned len = strlen ((*it)->m_name);
2100 char *copy = XNEWVEC (char, len + 2);
2101 copy[0] = '&';
2102 memcpy (copy + 1, (*it)->m_name, len);
2103 copy[len + 1] = '\0';
2104 len++;
2105 hsa_sanitize_name (copy);
2107 tree var_name = build_string (len, copy);
2108 TREE_TYPE (var_name)
2109 = build_array_type (char_type_node, build_index_type (size_int (len)));
2110 free (copy);
2112 vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2113 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2114 build1 (ADDR_EXPR,
2115 build_pointer_type (TREE_TYPE (var_name)),
2116 var_name));
2117 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2118 build_fold_addr_expr ((*it)->m_decl));
2120 tree variable_info_ctor = build_constructor (variable_info_type,
2121 variable_info_vec);
2123 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2124 variable_info_ctor);
2127 tree global_vars_ctor = build_constructor (global_vars_array_type,
2128 global_vars_vec);
2130 char tmp_name[64];
2131 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2132 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2133 get_identifier (tmp_name),
2134 global_vars_array_type);
2135 TREE_STATIC (global_vars_table) = 1;
2136 TREE_READONLY (global_vars_table) = 1;
2137 TREE_PUBLIC (global_vars_table) = 0;
2138 DECL_ARTIFICIAL (global_vars_table) = 1;
2139 DECL_IGNORED_P (global_vars_table) = 1;
2140 DECL_EXTERNAL (global_vars_table) = 0;
2141 TREE_CONSTANT (global_vars_table) = 1;
2142 DECL_INITIAL (global_vars_table) = global_vars_ctor;
2143 varpool_node::finalize_decl (global_vars_table);
2145 return global_vars_table;
2148 /* Create __hsa_host_functions and __hsa_kernels that contain
2149 all informations consumed by libgomp to register all kernels
2150 in the BRIG binary. */
2152 static void
2153 hsa_output_kernels (tree *host_func_table, tree *kernels)
2155 unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2157 tree int_num_of_kernels;
2158 int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2159 tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2160 tree host_functions_array_type = build_array_type (ptr_type_node,
2161 kernel_num_index_type);
2162 TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2164 vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2165 for (unsigned i = 0; i < map_count; ++i)
2167 tree decl = hsa_get_decl_kernel_mapping_decl (i);
2168 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2169 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2171 tree host_functions_ctor = build_constructor (host_functions_array_type,
2172 host_functions_vec);
2173 char tmp_name[64];
2174 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2175 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2176 get_identifier (tmp_name),
2177 host_functions_array_type);
2178 TREE_STATIC (hsa_host_func_table) = 1;
2179 TREE_READONLY (hsa_host_func_table) = 1;
2180 TREE_PUBLIC (hsa_host_func_table) = 0;
2181 DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2182 DECL_IGNORED_P (hsa_host_func_table) = 1;
2183 DECL_EXTERNAL (hsa_host_func_table) = 0;
2184 TREE_CONSTANT (hsa_host_func_table) = 1;
2185 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2186 varpool_node::finalize_decl (hsa_host_func_table);
2187 *host_func_table = hsa_host_func_table;
2189 /* Following code emits list of kernel_info structures. */
2191 tree kernel_info_type = make_node (RECORD_TYPE);
2192 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2193 get_identifier ("name"), ptr_type_node);
2194 DECL_CHAIN (id_f1) = NULL_TREE;
2195 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2196 get_identifier ("omp_data_size"),
2197 unsigned_type_node);
2198 DECL_CHAIN (id_f2) = id_f1;
2199 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2200 get_identifier ("gridified_kernel_p"),
2201 boolean_type_node);
2202 DECL_CHAIN (id_f3) = id_f2;
2203 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2204 get_identifier ("kernel_dependencies_count"),
2205 unsigned_type_node);
2206 DECL_CHAIN (id_f4) = id_f3;
2207 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2208 get_identifier ("kernel_dependencies"),
2209 build_pointer_type (build_pointer_type
2210 (char_type_node)));
2211 DECL_CHAIN (id_f5) = id_f4;
2212 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2213 NULL_TREE);
2215 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2216 tree kernel_info_vector_type
2217 = build_array_type (kernel_info_type,
2218 build_index_type (int_num_of_kernels));
2219 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2221 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2222 tree kernel_dependencies_vector_type = NULL;
2224 for (unsigned i = 0; i < map_count; ++i)
2226 tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2227 char *name = hsa_get_decl_kernel_mapping_name (i);
2228 unsigned len = strlen (name);
2229 char *copy = XNEWVEC (char, len + 2);
2230 copy[0] = '&';
2231 memcpy (copy + 1, name, len);
2232 copy[len + 1] = '\0';
2233 len++;
2235 tree kern_name = build_string (len, copy);
2236 TREE_TYPE (kern_name)
2237 = build_array_type (char_type_node, build_index_type (size_int (len)));
2238 free (copy);
2240 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2241 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2242 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2243 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2244 gridified_kernel_p);
2245 unsigned count = 0;
2246 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2247 if (hsa_decl_kernel_dependencies)
2249 vec<const char *> **slot;
2250 slot = hsa_decl_kernel_dependencies->get (kernel);
2251 if (slot)
2253 vec <const char *> *dependencies = *slot;
2254 count = dependencies->length ();
2256 kernel_dependencies_vector_type
2257 = build_array_type (build_pointer_type (char_type_node),
2258 build_index_type (size_int (count)));
2259 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2261 for (unsigned j = 0; j < count; j++)
2263 const char *d = (*dependencies)[j];
2264 len = strlen (d);
2265 tree dependency_name = build_string (len, d);
2266 TREE_TYPE (dependency_name)
2267 = build_array_type (char_type_node,
2268 build_index_type (size_int (len)));
2270 CONSTRUCTOR_APPEND_ELT
2271 (kernel_dependencies_vec, NULL_TREE,
2272 build1 (ADDR_EXPR,
2273 build_pointer_type (TREE_TYPE (dependency_name)),
2274 dependency_name));
2279 tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2281 vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2282 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2283 build1 (ADDR_EXPR,
2284 build_pointer_type (TREE_TYPE
2285 (kern_name)),
2286 kern_name));
2287 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2288 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2289 gridified_kernel_p_tree);
2290 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2292 if (count > 0)
2294 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2295 gcc_checking_assert (kernel_dependencies_vector_type);
2296 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2297 get_identifier (tmp_name),
2298 kernel_dependencies_vector_type);
2300 TREE_STATIC (dependencies_list) = 1;
2301 TREE_READONLY (dependencies_list) = 1;
2302 TREE_PUBLIC (dependencies_list) = 0;
2303 DECL_ARTIFICIAL (dependencies_list) = 1;
2304 DECL_IGNORED_P (dependencies_list) = 1;
2305 DECL_EXTERNAL (dependencies_list) = 0;
2306 TREE_CONSTANT (dependencies_list) = 1;
2307 DECL_INITIAL (dependencies_list)
2308 = build_constructor (kernel_dependencies_vector_type,
2309 kernel_dependencies_vec);
2310 varpool_node::finalize_decl (dependencies_list);
2312 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2313 build1 (ADDR_EXPR,
2314 build_pointer_type
2315 (TREE_TYPE (dependencies_list)),
2316 dependencies_list));
2318 else
2319 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2321 tree kernel_info_ctor = build_constructor (kernel_info_type,
2322 kernel_info_vec);
2324 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2325 kernel_info_ctor);
2328 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2329 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2330 get_identifier (tmp_name),
2331 kernel_info_vector_type);
2333 TREE_STATIC (hsa_kernels) = 1;
2334 TREE_READONLY (hsa_kernels) = 1;
2335 TREE_PUBLIC (hsa_kernels) = 0;
2336 DECL_ARTIFICIAL (hsa_kernels) = 1;
2337 DECL_IGNORED_P (hsa_kernels) = 1;
2338 DECL_EXTERNAL (hsa_kernels) = 0;
2339 TREE_CONSTANT (hsa_kernels) = 1;
2340 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2341 kernel_info_vector_vec);
2342 varpool_node::finalize_decl (hsa_kernels);
2343 *kernels = hsa_kernels;
2346 /* Create a static constructor that will register out brig stuff with
2347 libgomp. */
2349 static void
2350 hsa_output_libgomp_mapping (tree brig_decl)
2352 unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2353 unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2355 tree kernels;
2356 tree host_func_table;
2358 hsa_output_kernels (&host_func_table, &kernels);
2359 tree global_vars = hsa_output_global_variables ();
2361 tree hsa_image_desc_type = make_node (RECORD_TYPE);
2362 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2363 get_identifier ("brig_module"), ptr_type_node);
2364 DECL_CHAIN (id_f1) = NULL_TREE;
2365 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2366 get_identifier ("kernel_count"),
2367 unsigned_type_node);
2369 DECL_CHAIN (id_f2) = id_f1;
2370 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2371 get_identifier ("hsa_kernel_infos"),
2372 ptr_type_node);
2373 DECL_CHAIN (id_f3) = id_f2;
2374 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2375 get_identifier ("global_variable_count"),
2376 unsigned_type_node);
2377 DECL_CHAIN (id_f4) = id_f3;
2378 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2379 get_identifier ("hsa_global_variable_infos"),
2380 ptr_type_node);
2381 DECL_CHAIN (id_f5) = id_f4;
2382 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2383 NULL_TREE);
2384 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2386 vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2387 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2388 build_fold_addr_expr (brig_decl));
2389 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2390 build_int_cstu (unsigned_type_node, kernel_count));
2391 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2392 build1 (ADDR_EXPR,
2393 build_pointer_type (TREE_TYPE (kernels)),
2394 kernels));
2395 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2396 build_int_cstu (unsigned_type_node,
2397 global_variable_count));
2398 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2399 build1 (ADDR_EXPR,
2400 build_pointer_type (TREE_TYPE (global_vars)),
2401 global_vars));
2403 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2405 char tmp_name[64];
2406 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2407 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2408 get_identifier (tmp_name),
2409 hsa_image_desc_type);
2410 TREE_STATIC (hsa_img_descriptor) = 1;
2411 TREE_READONLY (hsa_img_descriptor) = 1;
2412 TREE_PUBLIC (hsa_img_descriptor) = 0;
2413 DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2414 DECL_IGNORED_P (hsa_img_descriptor) = 1;
2415 DECL_EXTERNAL (hsa_img_descriptor) = 0;
2416 TREE_CONSTANT (hsa_img_descriptor) = 1;
2417 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2418 varpool_node::finalize_decl (hsa_img_descriptor);
2420 /* Construct the "host_table" libgomp expects. */
2421 tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2422 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2423 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2424 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2425 tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2426 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2427 host_func_table_addr);
2428 offset_int func_table_size
2429 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2430 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2431 fold_build2 (POINTER_PLUS_EXPR,
2432 TREE_TYPE (host_func_table_addr),
2433 host_func_table_addr,
2434 build_int_cst (size_type_node,
2435 func_table_size.to_uhwi
2436 ())));
2437 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2438 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2439 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2440 libgomp_host_table_vec);
2441 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2442 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2443 get_identifier (tmp_name),
2444 libgomp_host_table_type);
2446 TREE_STATIC (hsa_libgomp_host_table) = 1;
2447 TREE_READONLY (hsa_libgomp_host_table) = 1;
2448 TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2449 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2450 DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2451 DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2452 TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2453 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2454 varpool_node::finalize_decl (hsa_libgomp_host_table);
2456 /* Generate an initializer with a call to the registration routine. */
2458 tree offload_register
2459 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2460 gcc_checking_assert (offload_register);
2462 tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2463 append_to_statement_list
2464 (build_call_expr (offload_register, 4,
2465 build_int_cstu (unsigned_type_node,
2466 GOMP_VERSION_PACK (GOMP_VERSION,
2467 GOMP_VERSION_HSA)),
2468 build_fold_addr_expr (hsa_libgomp_host_table),
2469 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2470 build_fold_addr_expr (hsa_img_descriptor)),
2471 hsa_ctor_stmts);
2473 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2475 tree offload_unregister
2476 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2477 gcc_checking_assert (offload_unregister);
2479 tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2480 append_to_statement_list
2481 (build_call_expr (offload_unregister, 4,
2482 build_int_cstu (unsigned_type_node,
2483 GOMP_VERSION_PACK (GOMP_VERSION,
2484 GOMP_VERSION_HSA)),
2485 build_fold_addr_expr (hsa_libgomp_host_table),
2486 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2487 build_fold_addr_expr (hsa_img_descriptor)),
2488 hsa_dtor_stmts);
2489 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2492 /* Emit the brig module we have compiled to a section in the final assembly and
2493 also create a compile unit static constructor that will register the brig
2494 module with libgomp. */
2496 void
2497 hsa_output_brig (void)
2499 section *saved_section;
2501 if (!brig_initialized)
2502 return;
2504 for (unsigned i = 0; i < function_call_linkage.length (); i++)
2506 function_linkage_pair p = function_call_linkage[i];
2508 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2509 gcc_assert (*func_offset);
2510 BrigOperandCodeRef *code_ref
2511 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2512 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2513 code_ref->ref = lendian32 (*func_offset);
2516 /* Iterate all function declarations and if we meet a function that should
2517 have module linkage and we are unable to emit HSAIL for the function,
2518 then change the linkage to program linkage. Doing so, we will emit
2519 a valid BRIG image. */
2520 if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2521 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2522 = emitted_declarations->begin ();
2523 it != emitted_declarations->end ();
2524 ++it)
2526 if (hsa_failed_functions->contains ((*it).first))
2527 (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2530 saved_section = in_section;
2532 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2533 char tmp_name[64];
2534 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2535 ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2536 tree brig_id = get_identifier (tmp_name);
2537 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2538 char_type_node);
2539 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2540 TREE_ADDRESSABLE (brig_decl) = 1;
2541 TREE_READONLY (brig_decl) = 1;
2542 DECL_ARTIFICIAL (brig_decl) = 1;
2543 DECL_IGNORED_P (brig_decl) = 1;
2544 TREE_STATIC (brig_decl) = 1;
2545 TREE_PUBLIC (brig_decl) = 0;
2546 TREE_USED (brig_decl) = 1;
2547 DECL_INITIAL (brig_decl) = brig_decl;
2548 TREE_ASM_WRITTEN (brig_decl) = 1;
2550 BrigModuleHeader module_header;
2551 memcpy (&module_header.identification, "HSA BRIG",
2552 sizeof (module_header.identification));
2553 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2554 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2555 uint64_t section_index[3];
2557 int data_padding, code_padding, operand_padding;
2558 data_padding = HSA_SECTION_ALIGNMENT
2559 - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2560 code_padding = HSA_SECTION_ALIGNMENT
2561 - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2562 operand_padding = HSA_SECTION_ALIGNMENT
2563 - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2565 uint64_t module_size = sizeof (module_header)
2566 + sizeof (section_index)
2567 + brig_data.total_size
2568 + data_padding
2569 + brig_code.total_size
2570 + code_padding
2571 + brig_operand.total_size
2572 + operand_padding;
2573 gcc_assert ((module_size % 16) == 0);
2574 module_header.byteCount = lendian64 (module_size);
2575 memset (&module_header.hash, 0, sizeof (module_header.hash));
2576 module_header.reserved = 0;
2577 module_header.sectionCount = lendian32 (3);
2578 module_header.sectionIndex = lendian64 (sizeof (module_header));
2579 assemble_string ((const char *) &module_header, sizeof (module_header));
2580 uint64_t off = sizeof (module_header) + sizeof (section_index);
2581 section_index[0] = lendian64 (off);
2582 off += brig_data.total_size + data_padding;
2583 section_index[1] = lendian64 (off);
2584 off += brig_code.total_size + code_padding;
2585 section_index[2] = lendian64 (off);
2586 assemble_string ((const char *) &section_index, sizeof (section_index));
2588 char padding[HSA_SECTION_ALIGNMENT];
2589 memset (padding, 0, sizeof (padding));
2591 brig_data.output ();
2592 assemble_string (padding, data_padding);
2593 brig_code.output ();
2594 assemble_string (padding, code_padding);
2595 brig_operand.output ();
2596 assemble_string (padding, operand_padding);
2598 if (saved_section)
2599 switch_to_section (saved_section);
2601 hsa_output_libgomp_mapping (brig_decl);
2603 hsa_free_decl_kernel_mapping ();
2604 brig_release_data ();
2605 hsa_deinit_compilation_unit_data ();
2607 delete emitted_declarations;
2608 emitted_declarations = NULL;
2609 delete function_offsets;
2610 function_offsets = NULL;