* gcc.dg/torture/pr78515.c: Add -Wno-psabi for AIX.
[official-gcc.git] / gcc / hsa-brig.c
blobacd91647cc62f8e4893bee3f2c41b00d1b7c219a
1 /* Producing binary form of HSA BRIG from our internal representation.
2 Copyright (C) 2013-2016 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz> and
4 Martin Liska <mliska@suse.cz>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "target.h"
27 #include "memmodel.h"
28 #include "tm_p.h"
29 #include "is-a.h"
30 #include "vec.h"
31 #include "hash-table.h"
32 #include "hash-map.h"
33 #include "tree.h"
34 #include "tree-iterator.h"
35 #include "stor-layout.h"
36 #include "output.h"
37 #include "cfg.h"
38 #include "function.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "gimple-pretty-print.h"
42 #include "diagnostic-core.h"
43 #include "cgraph.h"
44 #include "dumpfile.h"
45 #include "print-tree.h"
46 #include "symbol-summary.h"
47 #include "hsa.h"
48 #include "gomp-constants.h"
50 /* Convert VAL to little endian form, if necessary. */
52 static uint16_t
53 lendian16 (uint16_t val)
55 #if GCC_VERSION >= 4008
56 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
57 return val;
58 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
59 return __builtin_bswap16 (val);
60 #else /* __ORDER_PDP_ENDIAN__ */
61 return val;
62 #endif
63 #else
64 // provide a safe slower default, with shifts and masking
65 #ifndef WORDS_BIGENDIAN
66 return val;
67 #else
68 return (val >> 8) | (val << 8);
69 #endif
70 #endif
73 /* Convert VAL to little endian form, if necessary. */
75 static uint32_t
76 lendian32 (uint32_t val)
78 #if GCC_VERSION >= 4006
79 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
80 return val;
81 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
82 return __builtin_bswap32 (val);
83 #else /* __ORDER_PDP_ENDIAN__ */
84 return (val >> 16) | (val << 16);
85 #endif
86 #else
87 // provide a safe slower default, with shifts and masking
88 #ifndef WORDS_BIGENDIAN
89 return val;
90 #else
91 val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
92 return (val >> 16) | (val << 16);
93 #endif
94 #endif
97 /* Convert VAL to little endian form, if necessary. */
99 static uint64_t
100 lendian64 (uint64_t val)
102 #if GCC_VERSION >= 4006
103 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
104 return val;
105 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
106 return __builtin_bswap64 (val);
107 #else /* __ORDER_PDP_ENDIAN__ */
108 return (((val & 0xffffll) << 48)
109 | ((val & 0xffff0000ll) << 16)
110 | ((val & 0xffff00000000ll) >> 16)
111 | ((val & 0xffff000000000000ll) >> 48));
112 #endif
113 #else
114 // provide a safe slower default, with shifts and masking
115 #ifndef WORDS_BIGENDIAN
116 return val;
117 #else
118 val = (((val & 0xff00ff00ff00ff00ll) >> 8)
119 | ((val & 0x00ff00ff00ff00ffll) << 8));
120 val = ((( val & 0xffff0000ffff0000ll) >> 16)
121 | (( val & 0x0000ffff0000ffffll) << 16));
122 return (val >> 32) | (val << 32);
123 #endif
124 #endif
127 #define BRIG_ELF_SECTION_NAME ".brig"
128 #define BRIG_LABEL_STRING "hsa_brig"
129 #define BRIG_SECTION_DATA_NAME "hsa_data"
130 #define BRIG_SECTION_CODE_NAME "hsa_code"
131 #define BRIG_SECTION_OPERAND_NAME "hsa_operand"
133 #define BRIG_CHUNK_MAX_SIZE (64 * 1024)
135 /* Required HSA section alignment. */
137 #define HSA_SECTION_ALIGNMENT 16
139 /* Chunks of BRIG binary data. */
141 struct hsa_brig_data_chunk
143 /* Size of the data already stored into a chunk. */
144 unsigned size;
146 /* Pointer to the data. */
147 char *data;
150 /* Structure representing a BRIG section, holding and writing its data. */
152 class hsa_brig_section
154 public:
155 /* Section name that will be output to the BRIG. */
156 const char *section_name;
157 /* Size in bytes of all data stored in the section. */
158 unsigned total_size;
159 /* The size of the header of the section including padding. */
160 unsigned header_byte_count;
161 /* The size of the header of the section without any padding. */
162 unsigned header_byte_delta;
164 void init (const char *name);
165 void release ();
166 void output ();
167 unsigned add (const void *data, unsigned len, void **output = NULL);
168 void round_size_up (int factor);
169 void *get_ptr_by_offset (unsigned int offset);
171 private:
172 void allocate_new_chunk ();
174 /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
175 vec <struct hsa_brig_data_chunk> chunks;
177 /* More convenient access to the last chunk from the vector above. */
178 struct hsa_brig_data_chunk *cur_chunk;
181 static struct hsa_brig_section brig_data, brig_code, brig_operand;
182 static uint32_t brig_insn_count;
183 static bool brig_initialized = false;
185 /* Mapping between emitted HSA functions and their offset in code segment. */
186 static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
188 /* Hash map of emitted function declarations. */
189 static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
191 /* Hash table of emitted internal function declaration offsets. */
192 hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
194 /* List of sbr instructions. */
195 static vec <hsa_insn_sbr *> *switch_instructions;
197 struct function_linkage_pair
199 function_linkage_pair (tree decl, unsigned int off)
200 : function_decl (decl), offset (off) {}
202 /* Declaration of called function. */
203 tree function_decl;
205 /* Offset in operand section. */
206 unsigned int offset;
209 /* Vector of function calls where we need to resolve function offsets. */
210 static auto_vec <function_linkage_pair> function_call_linkage;
212 /* Add a new chunk, allocate data for it and initialize it. */
214 void
215 hsa_brig_section::allocate_new_chunk ()
217 struct hsa_brig_data_chunk new_chunk;
219 new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
220 new_chunk.size = 0;
221 cur_chunk = chunks.safe_push (new_chunk);
224 /* Initialize the brig section. */
226 void
227 hsa_brig_section::init (const char *name)
229 section_name = name;
230 /* While the following computation is basically wrong, because the intent
231 certainly wasn't to have the first character of name and padding, which
232 are a part of sizeof (BrigSectionHeader), included in the first addend,
233 this is what the disassembler expects. */
234 total_size = sizeof (BrigSectionHeader) + strlen (section_name);
235 chunks.create (1);
236 allocate_new_chunk ();
237 header_byte_delta = total_size;
238 round_size_up (4);
239 header_byte_count = total_size;
242 /* Free all data in the section. */
244 void
245 hsa_brig_section::release ()
247 for (unsigned i = 0; i < chunks.length (); i++)
248 free (chunks[i].data);
249 chunks.release ();
250 cur_chunk = NULL;
253 /* Write the section to the output file to a section with the name given at
254 initialization. Switches the output section and does not restore it. */
256 void
257 hsa_brig_section::output ()
259 struct BrigSectionHeader section_header;
260 char padding[8];
262 section_header.byteCount = lendian64 (total_size);
263 section_header.headerByteCount = lendian32 (header_byte_count);
264 section_header.nameLength = lendian32 (strlen (section_name));
265 assemble_string ((const char *) &section_header, 16);
266 assemble_string (section_name, (section_header.nameLength));
267 memset (&padding, 0, sizeof (padding));
268 /* This is also a consequence of the wrong header size computation described
269 in a comment in hsa_brig_section::init. */
270 assemble_string (padding, 8);
271 for (unsigned i = 0; i < chunks.length (); i++)
272 assemble_string (chunks[i].data, chunks[i].size);
275 /* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
276 which it was stored. If OUTPUT is not NULL, store into it the pointer to
277 the place where DATA was actually stored. */
279 unsigned
280 hsa_brig_section::add (const void *data, unsigned len, void **output)
282 unsigned offset = total_size;
284 gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
285 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
286 allocate_new_chunk ();
288 char *dst = cur_chunk->data + cur_chunk->size;
289 memcpy (dst, data, len);
290 if (output)
291 *output = dst;
292 cur_chunk->size += len;
293 total_size += len;
295 return offset;
298 /* Add padding to section so that its size is divisible by FACTOR. */
300 void
301 hsa_brig_section::round_size_up (int factor)
303 unsigned padding, res = total_size % factor;
305 if (res == 0)
306 return;
308 padding = factor - res;
309 total_size += padding;
310 if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
312 padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
313 cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
314 allocate_new_chunk ();
317 cur_chunk->size += padding;
320 /* Return pointer to data by global OFFSET in the section. */
322 void *
323 hsa_brig_section::get_ptr_by_offset (unsigned int offset)
325 gcc_assert (offset < total_size);
326 offset -= header_byte_delta;
328 unsigned i;
329 for (i = 0; offset >= chunks[i].size; i++)
330 offset -= chunks[i].size;
332 return chunks[i].data + offset;
335 /* BRIG string data hashing. */
337 struct brig_string_slot
339 const char *s;
340 char prefix;
341 int len;
342 uint32_t offset;
345 /* Hash table helpers. */
347 struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
349 static inline hashval_t hash (const value_type);
350 static inline bool equal (const value_type, const compare_type);
351 static inline void remove (value_type);
354 /* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
355 to support strings that may not end in '\0'. */
357 inline hashval_t
358 brig_string_slot_hasher::hash (const value_type ds)
360 hashval_t r = ds->len;
361 int i;
363 for (i = 0; i < ds->len; i++)
364 r = r * 67 + (unsigned) ds->s[i] - 113;
365 r = r * 67 + (unsigned) ds->prefix - 113;
366 return r;
369 /* Returns nonzero if DS1 and DS2 are equal. */
371 inline bool
372 brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
374 if (ds1->len == ds2->len)
375 return ds1->prefix == ds2->prefix
376 && memcmp (ds1->s, ds2->s, ds1->len) == 0;
378 return 0;
381 /* Deallocate memory for DS upon its removal. */
383 inline void
384 brig_string_slot_hasher::remove (value_type ds)
386 free (const_cast<char *> (ds->s));
387 free (ds);
390 /* Hash for strings we output in order not to duplicate them needlessly. */
392 static hash_table<brig_string_slot_hasher> *brig_string_htab;
394 /* Emit a null terminated string STR to the data section and return its
395 offset in it. If PREFIX is non-zero, output it just before STR too.
396 Sanitize the string if SANITIZE option is set to true. */
398 static unsigned
399 brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
401 unsigned slen = strlen (str);
402 unsigned offset, len = slen + (prefix ? 1 : 0);
403 uint32_t hdr_len = lendian32 (len);
404 brig_string_slot s_slot;
405 brig_string_slot **slot;
406 char *str2;
408 str2 = xstrdup (str);
410 if (sanitize)
411 hsa_sanitize_name (str2);
412 s_slot.s = str2;
413 s_slot.len = slen;
414 s_slot.prefix = prefix;
415 s_slot.offset = 0;
417 slot = brig_string_htab->find_slot (&s_slot, INSERT);
418 if (*slot == NULL)
420 brig_string_slot *new_slot = XCNEW (brig_string_slot);
422 /* In theory we should fill in BrigData but that would mean copying
423 the string to a buffer for no reason, so we just emulate it. */
424 offset = brig_data.add (&hdr_len, sizeof (hdr_len));
425 if (prefix)
426 brig_data.add (&prefix, 1);
428 brig_data.add (str2, slen);
429 brig_data.round_size_up (4);
431 /* TODO: could use the string we just copied into
432 brig_string->cur_chunk */
433 new_slot->s = str2;
434 new_slot->len = slen;
435 new_slot->prefix = prefix;
436 new_slot->offset = offset;
437 *slot = new_slot;
439 else
441 offset = (*slot)->offset;
442 free (str2);
445 return offset;
448 /* Linked list of queued operands. */
450 static struct operand_queue
452 /* First from the chain of queued operands. */
453 hsa_op_base *first_op, *last_op;
455 /* The offset at which the next operand will be enqueued. */
456 unsigned projected_size;
458 } op_queue;
460 /* Unless already initialized, initialize infrastructure to produce BRIG. */
462 static void
463 brig_init (void)
465 brig_insn_count = 0;
467 if (brig_initialized)
468 return;
470 brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
471 brig_data.init (BRIG_SECTION_DATA_NAME);
472 brig_code.init (BRIG_SECTION_CODE_NAME);
473 brig_operand.init (BRIG_SECTION_OPERAND_NAME);
474 brig_initialized = true;
476 struct BrigDirectiveModule moddir;
477 memset (&moddir, 0, sizeof (moddir));
478 moddir.base.byteCount = lendian16 (sizeof (moddir));
480 char *modname;
481 if (main_input_filename && *main_input_filename != '\0')
483 const char *part = strrchr (main_input_filename, '/');
484 if (!part)
485 part = main_input_filename;
486 else
487 part++;
488 modname = concat ("&__hsa_module_", part, NULL);
489 char *extension = strchr (modname, '.');
490 if (extension)
491 *extension = '\0';
493 /* As in LTO mode, we have to emit a different module names. */
494 if (flag_ltrans)
496 part = strrchr (asm_file_name, '/');
497 if (!part)
498 part = asm_file_name;
499 else
500 part++;
501 char *modname2;
502 asprintf (&modname2, "%s_%s", modname, part);
503 free (modname);
504 modname = modname2;
507 hsa_sanitize_name (modname);
508 moddir.name = brig_emit_string (modname);
509 free (modname);
511 else
512 moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
513 moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
514 moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
515 moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
516 moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
517 if (hsa_machine_large_p ())
518 moddir.machineModel = BRIG_MACHINE_LARGE;
519 else
520 moddir.machineModel = BRIG_MACHINE_SMALL;
521 moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
522 brig_code.add (&moddir, sizeof (moddir));
525 /* Free all BRIG data. */
527 static void
528 brig_release_data (void)
530 delete brig_string_htab;
531 brig_data.release ();
532 brig_code.release ();
533 brig_operand.release ();
535 brig_initialized = 0;
538 /* Enqueue operation OP. Return the offset at which it will be stored. */
540 static unsigned int
541 enqueue_op (hsa_op_base *op)
543 unsigned ret;
545 if (op->m_brig_op_offset)
546 return op->m_brig_op_offset;
548 ret = op_queue.projected_size;
549 op->m_brig_op_offset = op_queue.projected_size;
551 if (!op_queue.first_op)
552 op_queue.first_op = op;
553 else
554 op_queue.last_op->m_next = op;
555 op_queue.last_op = op;
557 if (is_a <hsa_op_immed *> (op))
558 op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
559 else if (is_a <hsa_op_reg *> (op))
560 op_queue.projected_size += sizeof (struct BrigOperandRegister);
561 else if (is_a <hsa_op_address *> (op))
562 op_queue.projected_size += sizeof (struct BrigOperandAddress);
563 else if (is_a <hsa_op_code_ref *> (op))
564 op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
565 else if (is_a <hsa_op_code_list *> (op))
566 op_queue.projected_size += sizeof (struct BrigOperandCodeList);
567 else if (is_a <hsa_op_operand_list *> (op))
568 op_queue.projected_size += sizeof (struct BrigOperandOperandList);
569 else
570 gcc_unreachable ();
571 return ret;
574 static void emit_immediate_operand (hsa_op_immed *imm);
576 /* Emit directive describing a symbol if it has not been emitted already.
577 Return the offset of the directive. */
579 static unsigned
580 emit_directive_variable (struct hsa_symbol *symbol)
582 struct BrigDirectiveVariable dirvar;
583 unsigned name_offset;
584 static unsigned res_name_offset;
586 if (symbol->m_directive_offset)
587 return symbol->m_directive_offset;
589 memset (&dirvar, 0, sizeof (dirvar));
590 dirvar.base.byteCount = lendian16 (sizeof (dirvar));
591 dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
592 dirvar.allocation = symbol->m_allocation;
594 char prefix = symbol->m_global_scope_p ? '&' : '%';
596 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
598 if (res_name_offset == 0)
599 res_name_offset = brig_emit_string (symbol->m_name, '%');
600 name_offset = res_name_offset;
602 else if (symbol->m_name)
603 name_offset = brig_emit_string (symbol->m_name, prefix);
604 else
606 char buf[64];
607 snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
608 symbol->m_name_number);
609 name_offset = brig_emit_string (buf, prefix);
612 dirvar.name = lendian32 (name_offset);
614 if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL)
616 hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl));
617 dirvar.init = lendian32 (enqueue_op (tmp));
619 else
620 dirvar.init = 0;
621 dirvar.type = lendian16 (symbol->m_type);
622 dirvar.segment = symbol->m_segment;
623 dirvar.align = symbol->m_align;
624 dirvar.linkage = symbol->m_linkage;
625 dirvar.dim.lo = symbol->m_dim;
626 dirvar.dim.hi = symbol->m_dim >> 32;
628 /* Global variables are just declared and linked via HSA runtime. */
629 if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
630 dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
631 dirvar.reserved = 0;
633 if (symbol->m_cst_value)
635 dirvar.modifier |= BRIG_VARIABLE_CONST;
636 dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
639 symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
640 return symbol->m_directive_offset;
643 /* Emit directives describing either a function declaration or definition F and
644 return the produced BrigDirectiveExecutable structure. The function does
645 not take into account any instructions when calculating nextModuleEntry
646 field of the produced BrigDirectiveExecutable structure so when emitting
647 actual definitions, this field needs to be updated after all of the function
648 is actually added to the code section. */
650 static BrigDirectiveExecutable *
651 emit_function_directives (hsa_function_representation *f, bool is_declaration)
653 struct BrigDirectiveExecutable fndir;
654 unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
655 int count = 0;
656 void *ptr_to_fndir;
657 hsa_symbol *sym;
659 if (!f->m_declaration_p)
660 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
662 gcc_assert (!sym->m_emitted_to_brig);
663 sym->m_emitted_to_brig = true;
664 emit_directive_variable (sym);
665 brig_insn_count++;
668 name_offset = brig_emit_string (f->m_name, '&');
669 inarg_off = brig_code.total_size + sizeof (fndir)
670 + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
671 scoped_off = inarg_off
672 + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
674 if (!f->m_declaration_p)
676 count += f->m_spill_symbols.length ();
677 count += f->m_private_variables.length ();
680 next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
682 memset (&fndir, 0, sizeof (fndir));
683 fndir.base.byteCount = lendian16 (sizeof (fndir));
684 fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
685 : BRIG_KIND_DIRECTIVE_FUNCTION);
686 fndir.name = lendian32 (name_offset);
687 fndir.inArgCount = lendian16 (f->m_input_args.length ());
688 fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
689 fndir.firstInArg = lendian32 (inarg_off);
690 fndir.firstCodeBlockEntry = lendian32 (scoped_off);
691 fndir.nextModuleEntry = lendian32 (next_toplev_off);
692 fndir.linkage = f->get_linkage ();
693 if (!f->m_declaration_p)
694 fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
695 memset (&fndir.reserved, 0, sizeof (fndir.reserved));
697 /* Once we put a definition of function_offsets, we should not overwrite
698 it with a declaration of the function. */
699 if (f->m_internal_fn == NULL)
701 if (!function_offsets->get (f->m_decl) || !is_declaration)
702 function_offsets->put (f->m_decl, brig_code.total_size);
704 else
706 /* Internal function. */
707 hsa_internal_fn **slot
708 = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
709 hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
710 int_fn->m_offset = brig_code.total_size;
711 *slot = int_fn;
714 brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir);
716 if (f->m_output_arg)
717 emit_directive_variable (f->m_output_arg);
718 for (unsigned i = 0; i < f->m_input_args.length (); i++)
719 emit_directive_variable (f->m_input_args[i]);
721 if (!f->m_declaration_p)
723 for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
725 emit_directive_variable (sym);
726 brig_insn_count++;
728 for (unsigned i = 0; i < f->m_private_variables.length (); i++)
730 emit_directive_variable (f->m_private_variables[i]);
731 brig_insn_count++;
735 return (BrigDirectiveExecutable *) ptr_to_fndir;
738 /* Emit a label directive for the given HBB. We assume it is about to start on
739 the current offset in the code section. */
741 static void
742 emit_bb_label_directive (hsa_bb *hbb)
744 struct BrigDirectiveLabel lbldir;
746 lbldir.base.byteCount = lendian16 (sizeof (lbldir));
747 lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
748 char buf[32];
749 snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
750 hbb->m_index);
751 lbldir.name = lendian32 (brig_emit_string (buf, '@'));
753 hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
754 sizeof (lbldir));
755 brig_insn_count++;
758 /* Map a normal HSAIL type to the type of the equivalent BRIG operand
759 holding such, for constants and registers. */
761 static BrigType16_t
762 regtype_for_type (BrigType16_t t)
764 switch (t)
766 case BRIG_TYPE_B1:
767 return BRIG_TYPE_B1;
769 case BRIG_TYPE_U8:
770 case BRIG_TYPE_U16:
771 case BRIG_TYPE_U32:
772 case BRIG_TYPE_S8:
773 case BRIG_TYPE_S16:
774 case BRIG_TYPE_S32:
775 case BRIG_TYPE_B8:
776 case BRIG_TYPE_B16:
777 case BRIG_TYPE_B32:
778 case BRIG_TYPE_F16:
779 case BRIG_TYPE_F32:
780 case BRIG_TYPE_U8X4:
781 case BRIG_TYPE_U16X2:
782 case BRIG_TYPE_S8X4:
783 case BRIG_TYPE_S16X2:
784 case BRIG_TYPE_F16X2:
785 return BRIG_TYPE_B32;
787 case BRIG_TYPE_U64:
788 case BRIG_TYPE_S64:
789 case BRIG_TYPE_F64:
790 case BRIG_TYPE_B64:
791 case BRIG_TYPE_U8X8:
792 case BRIG_TYPE_U16X4:
793 case BRIG_TYPE_U32X2:
794 case BRIG_TYPE_S8X8:
795 case BRIG_TYPE_S16X4:
796 case BRIG_TYPE_S32X2:
797 case BRIG_TYPE_F16X4:
798 case BRIG_TYPE_F32X2:
799 return BRIG_TYPE_B64;
801 case BRIG_TYPE_B128:
802 case BRIG_TYPE_U8X16:
803 case BRIG_TYPE_U16X8:
804 case BRIG_TYPE_U32X4:
805 case BRIG_TYPE_U64X2:
806 case BRIG_TYPE_S8X16:
807 case BRIG_TYPE_S16X8:
808 case BRIG_TYPE_S32X4:
809 case BRIG_TYPE_S64X2:
810 case BRIG_TYPE_F16X8:
811 case BRIG_TYPE_F32X4:
812 case BRIG_TYPE_F64X2:
813 return BRIG_TYPE_B128;
815 default:
816 gcc_unreachable ();
820 /* Return the length of the BRIG type TYPE that is going to be streamed out as
821 an immediate constant (so it must not be B1). */
823 unsigned
824 hsa_get_imm_brig_type_len (BrigType16_t type)
826 BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
827 BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
829 switch (pack_type)
831 case BRIG_TYPE_PACK_NONE:
832 break;
833 case BRIG_TYPE_PACK_32:
834 return 4;
835 case BRIG_TYPE_PACK_64:
836 return 8;
837 case BRIG_TYPE_PACK_128:
838 return 16;
839 default:
840 gcc_unreachable ();
843 switch (base_type)
845 case BRIG_TYPE_U8:
846 case BRIG_TYPE_S8:
847 case BRIG_TYPE_B8:
848 return 1;
849 case BRIG_TYPE_U16:
850 case BRIG_TYPE_S16:
851 case BRIG_TYPE_F16:
852 case BRIG_TYPE_B16:
853 return 2;
854 case BRIG_TYPE_U32:
855 case BRIG_TYPE_S32:
856 case BRIG_TYPE_F32:
857 case BRIG_TYPE_B32:
858 return 4;
859 case BRIG_TYPE_U64:
860 case BRIG_TYPE_S64:
861 case BRIG_TYPE_F64:
862 case BRIG_TYPE_B64:
863 return 8;
864 case BRIG_TYPE_B128:
865 return 16;
866 default:
867 gcc_unreachable ();
871 /* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
872 If NEED_LEN is not equal to zero, shrink or extend the value
873 to NEED_LEN bytes. Return how many bytes were written. */
875 static int
876 emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
878 union hsa_bytes bytes;
880 memset (&bytes, 0, sizeof (bytes));
881 tree type = TREE_TYPE (value);
882 gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
884 unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
885 if (INTEGRAL_TYPE_P (type)
886 || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
887 switch (data_len)
889 case 1:
890 bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
891 break;
892 case 2:
893 bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
894 break;
895 case 4:
896 bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
897 break;
898 case 8:
899 bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
900 break;
901 default:
902 gcc_unreachable ();
904 else if (SCALAR_FLOAT_TYPE_P (type))
906 if (data_len == 2)
908 sorry ("Support for HSA does not implement immediate 16 bit FPU "
909 "operands");
910 return 2;
912 unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type));
913 /* There are always 32 bits in each long, no matter the size of
914 the hosts long. */
915 long tmp[6];
917 real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
919 if (int_len == 4)
920 bytes.b32 = (uint32_t) tmp[0];
921 else
923 bytes.b64 = (uint64_t)(uint32_t) tmp[1];
924 bytes.b64 <<= 32;
925 bytes.b64 |= (uint32_t) tmp[0];
928 else
929 gcc_unreachable ();
931 int len;
932 if (need_len == 0)
933 len = data_len;
934 else
935 len = need_len;
937 memcpy (data, &bytes, len);
938 return len;
941 char *
942 hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
944 char *brig_repr;
945 *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
947 if (m_tree_value != NULL_TREE)
949 /* Update brig_repr_size for special tree values. */
950 if (TREE_CODE (m_tree_value) == STRING_CST)
951 *brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
952 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
953 *brig_repr_size
954 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
956 unsigned total_len = *brig_repr_size;
958 /* As we can have a constructor with fewer elements, fill the memory
959 with zeros. */
960 brig_repr = XCNEWVEC (char, total_len);
961 char *p = brig_repr;
963 if (TREE_CODE (m_tree_value) == VECTOR_CST)
965 int i, num = VECTOR_CST_NELTS (m_tree_value);
966 for (i = 0; i < num; i++)
968 tree v = VECTOR_CST_ELT (m_tree_value, i);
969 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
970 total_len -= actual;
971 p += actual;
973 /* Vectors should have the exact size. */
974 gcc_assert (total_len == 0);
976 else if (TREE_CODE (m_tree_value) == STRING_CST)
977 memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
978 TREE_STRING_LENGTH (m_tree_value));
979 else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
981 gcc_assert (total_len % 2 == 0);
982 unsigned actual;
983 actual
984 = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
985 total_len / 2);
987 gcc_assert (actual == total_len / 2);
988 p += actual;
990 actual
991 = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
992 total_len / 2);
993 gcc_assert (actual == total_len / 2);
995 else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
997 unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
998 for (unsigned i = 0; i < len; i++)
1000 tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
1001 unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
1002 total_len -= actual;
1003 p += actual;
1006 else
1007 emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
1009 else
1011 hsa_bytes bytes;
1013 switch (*brig_repr_size)
1015 case 1:
1016 bytes.b8 = (uint8_t) m_int_value;
1017 break;
1018 case 2:
1019 bytes.b16 = (uint16_t) m_int_value;
1020 break;
1021 case 4:
1022 bytes.b32 = (uint32_t) m_int_value;
1023 break;
1024 case 8:
1025 bytes.b64 = (uint64_t) m_int_value;
1026 break;
1027 default:
1028 gcc_unreachable ();
1031 brig_repr = XNEWVEC (char, *brig_repr_size);
1032 memcpy (brig_repr, &bytes, *brig_repr_size);
1035 return brig_repr;
1038 /* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
1039 have been massaged to comply with various HSA/BRIG type requirements, so the
1040 only important aspect of that is the length (because HSAIL might expect
1041 smaller constants or become bit-data). The data should be represented
1042 according to what is in the tree representation. */
1044 static void
1045 emit_immediate_operand (hsa_op_immed *imm)
1047 unsigned brig_repr_size;
1048 char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
1049 struct BrigOperandConstantBytes out;
1051 memset (&out, 0, sizeof (out));
1052 out.base.byteCount = lendian16 (sizeof (out));
1053 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
1054 uint32_t byteCount = lendian32 (brig_repr_size);
1055 out.type = lendian16 (imm->m_type);
1056 out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1057 brig_operand.add (&out, sizeof (out));
1058 brig_data.add (brig_repr, brig_repr_size);
1059 brig_data.round_size_up (4);
1061 free (brig_repr);
1064 /* Emit a register BRIG operand REG. */
1066 static void
1067 emit_register_operand (hsa_op_reg *reg)
1069 struct BrigOperandRegister out;
1071 out.base.byteCount = lendian16 (sizeof (out));
1072 out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
1073 out.regNum = lendian32 (reg->m_hard_num);
1075 switch (regtype_for_type (reg->m_type))
1077 case BRIG_TYPE_B32:
1078 out.regKind = BRIG_REGISTER_KIND_SINGLE;
1079 break;
1080 case BRIG_TYPE_B64:
1081 out.regKind = BRIG_REGISTER_KIND_DOUBLE;
1082 break;
1083 case BRIG_TYPE_B128:
1084 out.regKind = BRIG_REGISTER_KIND_QUAD;
1085 break;
1086 case BRIG_TYPE_B1:
1087 out.regKind = BRIG_REGISTER_KIND_CONTROL;
1088 break;
1089 default:
1090 gcc_unreachable ();
1093 brig_operand.add (&out, sizeof (out));
1096 /* Emit an address BRIG operand ADDR. */
1098 static void
1099 emit_address_operand (hsa_op_address *addr)
1101 struct BrigOperandAddress out;
1103 out.base.byteCount = lendian16 (sizeof (out));
1104 out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
1105 out.symbol = addr->m_symbol
1106 ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
1107 out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
1109 if (sizeof (addr->m_imm_offset) == 8)
1111 out.offset.lo = lendian32 (addr->m_imm_offset);
1112 out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
1114 else
1116 gcc_assert (sizeof (addr->m_imm_offset) == 4);
1117 out.offset.lo = lendian32 (addr->m_imm_offset);
1118 out.offset.hi = 0;
1121 brig_operand.add (&out, sizeof (out));
1124 /* Emit a code reference operand REF. */
1126 static void
1127 emit_code_ref_operand (hsa_op_code_ref *ref)
1129 struct BrigOperandCodeRef out;
1131 out.base.byteCount = lendian16 (sizeof (out));
1132 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
1133 out.ref = lendian32 (ref->m_directive_offset);
1134 brig_operand.add (&out, sizeof (out));
1137 /* Emit a code list operand CODE_LIST. */
1139 static void
1140 emit_code_list_operand (hsa_op_code_list *code_list)
1142 struct BrigOperandCodeList out;
1143 unsigned args = code_list->m_offsets.length ();
1145 for (unsigned i = 0; i < args; i++)
1146 gcc_assert (code_list->m_offsets[i]);
1148 out.base.byteCount = lendian16 (sizeof (out));
1149 out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
1151 uint32_t byteCount = lendian32 (4 * args);
1153 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1154 brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
1155 brig_data.round_size_up (4);
1156 brig_operand.add (&out, sizeof (out));
1159 /* Emit an operand list operand OPERAND_LIST. */
1161 static void
1162 emit_operand_list_operand (hsa_op_operand_list *operand_list)
1164 struct BrigOperandOperandList out;
1165 unsigned args = operand_list->m_offsets.length ();
1167 for (unsigned i = 0; i < args; i++)
1168 gcc_assert (operand_list->m_offsets[i]);
1170 out.base.byteCount = lendian16 (sizeof (out));
1171 out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
1173 uint32_t byteCount = lendian32 (4 * args);
1175 out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
1176 brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
1177 brig_data.round_size_up (4);
1178 brig_operand.add (&out, sizeof (out));
1181 /* Emit all operands queued for writing. */
1183 static void
1184 emit_queued_operands (void)
1186 for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
1188 gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
1189 if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
1190 emit_immediate_operand (imm);
1191 else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
1192 emit_register_operand (reg);
1193 else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
1194 emit_address_operand (addr);
1195 else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
1196 emit_code_ref_operand (ref);
1197 else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
1198 emit_code_list_operand (code_list);
1199 else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
1200 emit_operand_list_operand (l);
1201 else
1202 gcc_unreachable ();
1206 /* Emit directives describing the function that is used for
1207 a function declaration. */
1209 static BrigDirectiveExecutable *
1210 emit_function_declaration (tree decl)
1212 hsa_function_representation *f = hsa_generate_function_declaration (decl);
1214 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1215 emit_queued_operands ();
1217 delete f;
1219 return e;
1222 /* Emit directives describing the function that is used for
1223 an internal function declaration. */
1225 static BrigDirectiveExecutable *
1226 emit_internal_fn_decl (hsa_internal_fn *fn)
1228 hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
1230 BrigDirectiveExecutable *e = emit_function_directives (f, true);
1231 emit_queued_operands ();
1233 delete f;
1235 return e;
1238 /* Enqueue all operands of INSN and return offset to BRIG data section
1239 to list of operand offsets. */
1241 static unsigned
1242 emit_insn_operands (hsa_insn_basic *insn)
1244 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1245 operand_offsets;
1247 unsigned l = insn->operand_count ();
1249 /* We have N operands so use 4 * N for the byte_count. */
1250 uint32_t byte_count = lendian32 (4 * l);
1251 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1252 if (l > 0)
1254 operand_offsets.safe_grow (l);
1255 for (unsigned i = 0; i < l; i++)
1256 operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
1258 brig_data.add (operand_offsets.address (),
1259 l * sizeof (BrigOperandOffset32_t));
1261 brig_data.round_size_up (4);
1262 return offset;
1265 /* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
1266 to BRIG data section to list of operand offsets. */
1268 static unsigned
1269 emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
1270 hsa_op_base *op2 = NULL)
1272 auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
1273 operand_offsets;
1275 gcc_checking_assert (op0 != NULL);
1276 operand_offsets.safe_push (enqueue_op (op0));
1278 if (op1 != NULL)
1280 operand_offsets.safe_push (enqueue_op (op1));
1281 if (op2 != NULL)
1282 operand_offsets.safe_push (enqueue_op (op2));
1285 unsigned l = operand_offsets.length ();
1287 /* We have N operands so use 4 * N for the byte_count. */
1288 uint32_t byte_count = lendian32 (4 * l);
1290 unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
1291 brig_data.add (operand_offsets.address (),
1292 l * sizeof (BrigOperandOffset32_t));
1294 brig_data.round_size_up (4);
1296 return offset;
1299 /* Emit an HSA memory instruction and all necessary directives, schedule
1300 necessary operands for writing. */
1302 static void
1303 emit_memory_insn (hsa_insn_mem *mem)
1305 struct BrigInstMem repr;
1306 gcc_checking_assert (mem->operand_count () == 2);
1308 hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
1310 /* This is necessary because of the erroneous typedef of
1311 BrigMemoryModifier8_t which introduces padding which may then contain
1312 random stuff (which we do not want so that we can test things don't
1313 change). */
1314 memset (&repr, 0, sizeof (repr));
1315 repr.base.base.byteCount = lendian16 (sizeof (repr));
1316 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1317 repr.base.opcode = lendian16 (mem->m_opcode);
1318 repr.base.type = lendian16 (mem->m_type);
1319 repr.base.operands = lendian32 (emit_insn_operands (mem));
1321 if (addr->m_symbol)
1322 repr.segment = addr->m_symbol->m_segment;
1323 else
1324 repr.segment = BRIG_SEGMENT_FLAT;
1325 repr.modifier = 0;
1326 repr.equivClass = mem->m_equiv_class;
1327 repr.align = mem->m_align;
1328 if (mem->m_opcode == BRIG_OPCODE_LD)
1329 repr.width = BRIG_WIDTH_1;
1330 else
1331 repr.width = BRIG_WIDTH_NONE;
1332 memset (&repr.reserved, 0, sizeof (repr.reserved));
1333 brig_code.add (&repr, sizeof (repr));
1334 brig_insn_count++;
1337 /* Emit an HSA signal memory instruction and all necessary directives, schedule
1338 necessary operands for writing. */
1340 static void
1341 emit_signal_insn (hsa_insn_signal *mem)
1343 struct BrigInstSignal repr;
1345 memset (&repr, 0, sizeof (repr));
1346 repr.base.base.byteCount = lendian16 (sizeof (repr));
1347 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
1348 repr.base.opcode = lendian16 (mem->m_opcode);
1349 repr.base.type = lendian16 (mem->m_type);
1350 repr.base.operands = lendian32 (emit_insn_operands (mem));
1352 repr.memoryOrder = mem->m_memory_order;
1353 repr.signalOperation = mem->m_signalop;
1354 repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32;
1356 brig_code.add (&repr, sizeof (repr));
1357 brig_insn_count++;
1360 /* Emit an HSA atomic memory instruction and all necessary directives, schedule
1361 necessary operands for writing. */
1363 static void
1364 emit_atomic_insn (hsa_insn_atomic *mem)
1366 struct BrigInstAtomic repr;
1368 /* Either operand[0] or operand[1] must be an address operand. */
1369 hsa_op_address *addr = NULL;
1370 if (is_a <hsa_op_address *> (mem->get_op (0)))
1371 addr = as_a <hsa_op_address *> (mem->get_op (0));
1372 else
1373 addr = as_a <hsa_op_address *> (mem->get_op (1));
1375 memset (&repr, 0, sizeof (repr));
1376 repr.base.base.byteCount = lendian16 (sizeof (repr));
1377 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
1378 repr.base.opcode = lendian16 (mem->m_opcode);
1379 repr.base.type = lendian16 (mem->m_type);
1380 repr.base.operands = lendian32 (emit_insn_operands (mem));
1382 if (addr->m_symbol)
1383 repr.segment = addr->m_symbol->m_segment;
1384 else
1385 repr.segment = BRIG_SEGMENT_FLAT;
1386 repr.memoryOrder = mem->m_memoryorder;
1387 repr.memoryScope = mem->m_memoryscope;
1388 repr.atomicOperation = mem->m_atomicop;
1390 brig_code.add (&repr, sizeof (repr));
1391 brig_insn_count++;
1394 /* Emit an HSA LDA instruction and all necessary directives, schedule
1395 necessary operands for writing. */
1397 static void
1398 emit_addr_insn (hsa_insn_basic *insn)
1400 struct BrigInstAddr repr;
1402 hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
1404 repr.base.base.byteCount = lendian16 (sizeof (repr));
1405 repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
1406 repr.base.opcode = lendian16 (insn->m_opcode);
1407 repr.base.type = lendian16 (insn->m_type);
1408 repr.base.operands = lendian32 (emit_insn_operands (insn));
1410 if (addr->m_symbol)
1411 repr.segment = addr->m_symbol->m_segment;
1412 else
1413 repr.segment = BRIG_SEGMENT_FLAT;
1414 memset (&repr.reserved, 0, sizeof (repr.reserved));
1416 brig_code.add (&repr, sizeof (repr));
1417 brig_insn_count++;
1420 /* Emit an HSA segment conversion instruction and all necessary directives,
1421 schedule necessary operands for writing. */
1423 static void
1424 emit_segment_insn (hsa_insn_seg *seg)
1426 struct BrigInstSegCvt repr;
1428 repr.base.base.byteCount = lendian16 (sizeof (repr));
1429 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
1430 repr.base.opcode = lendian16 (seg->m_opcode);
1431 repr.base.type = lendian16 (seg->m_type);
1432 repr.base.operands = lendian32 (emit_insn_operands (seg));
1433 repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
1434 repr.segment = seg->m_segment;
1435 repr.modifier = 0;
1437 brig_code.add (&repr, sizeof (repr));
1439 brig_insn_count++;
1442 /* Emit an HSA alloca instruction and all necessary directives,
1443 schedule necessary operands for writing. */
1445 static void
1446 emit_alloca_insn (hsa_insn_alloca *alloca)
1448 struct BrigInstMem repr;
1449 gcc_checking_assert (alloca->operand_count () == 2);
1451 memset (&repr, 0, sizeof (repr));
1452 repr.base.base.byteCount = lendian16 (sizeof (repr));
1453 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
1454 repr.base.opcode = lendian16 (alloca->m_opcode);
1455 repr.base.type = lendian16 (alloca->m_type);
1456 repr.base.operands = lendian32 (emit_insn_operands (alloca));
1457 repr.segment = BRIG_SEGMENT_PRIVATE;
1458 repr.modifier = 0;
1459 repr.equivClass = 0;
1460 repr.align = alloca->m_align;
1461 repr.width = BRIG_WIDTH_NONE;
1462 memset (&repr.reserved, 0, sizeof (repr.reserved));
1463 brig_code.add (&repr, sizeof (repr));
1464 brig_insn_count++;
1467 /* Emit an HSA comparison instruction and all necessary directives,
1468 schedule necessary operands for writing. */
1470 static void
1471 emit_cmp_insn (hsa_insn_cmp *cmp)
1473 struct BrigInstCmp repr;
1475 memset (&repr, 0, sizeof (repr));
1476 repr.base.base.byteCount = lendian16 (sizeof (repr));
1477 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
1478 repr.base.opcode = lendian16 (cmp->m_opcode);
1479 repr.base.type = lendian16 (cmp->m_type);
1480 repr.base.operands = lendian32 (emit_insn_operands (cmp));
1482 if (is_a <hsa_op_reg *> (cmp->get_op (1)))
1483 repr.sourceType
1484 = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
1485 else
1486 repr.sourceType
1487 = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
1488 repr.modifier = 0;
1489 repr.compare = cmp->m_compare;
1490 repr.pack = 0;
1492 brig_code.add (&repr, sizeof (repr));
1493 brig_insn_count++;
1496 /* Emit an HSA generic branching/sycnronization instruction. */
1498 static void
1499 emit_generic_branch_insn (hsa_insn_br *br)
1501 struct BrigInstBr repr;
1502 repr.base.base.byteCount = lendian16 (sizeof (repr));
1503 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1504 repr.base.opcode = lendian16 (br->m_opcode);
1505 repr.width = br->m_width;
1506 repr.base.type = lendian16 (br->m_type);
1507 repr.base.operands = lendian32 (emit_insn_operands (br));
1508 memset (&repr.reserved, 0, sizeof (repr.reserved));
1510 brig_code.add (&repr, sizeof (repr));
1511 brig_insn_count++;
1514 /* Emit an HSA conditional branching instruction and all necessary directives,
1515 schedule necessary operands for writing. */
1517 static void
1518 emit_cond_branch_insn (hsa_insn_cbr *br)
1520 struct BrigInstBr repr;
1522 basic_block target = NULL;
1523 edge_iterator ei;
1524 edge e;
1526 /* At the moment we only handle direct conditional jumps. */
1527 gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
1528 repr.base.base.byteCount = lendian16 (sizeof (repr));
1529 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1530 repr.base.opcode = lendian16 (br->m_opcode);
1531 repr.width = br->m_width;
1532 /* For Conditional jumps the type is always B1. */
1533 repr.base.type = lendian16 (BRIG_TYPE_B1);
1535 FOR_EACH_EDGE (e, ei, br->m_bb->succs)
1536 if (e->flags & EDGE_TRUE_VALUE)
1538 target = e->dest;
1539 break;
1541 gcc_assert (target);
1543 repr.base.operands
1544 = lendian32 (emit_operands (br->get_op (0),
1545 &hsa_bb_for_bb (target)->m_label_ref));
1546 memset (&repr.reserved, 0, sizeof (repr.reserved));
1548 brig_code.add (&repr, sizeof (repr));
1549 brig_insn_count++;
1552 /* Emit an HSA unconditional jump branching instruction that points to
1553 a label REFERENCE. */
1555 static void
1556 emit_unconditional_jump (hsa_op_code_ref *reference)
1558 struct BrigInstBr repr;
1560 repr.base.base.byteCount = lendian16 (sizeof (repr));
1561 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1562 repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
1563 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1564 /* Direct branches to labels must be width(all). */
1565 repr.width = BRIG_WIDTH_ALL;
1567 repr.base.operands = lendian32 (emit_operands (reference));
1568 memset (&repr.reserved, 0, sizeof (repr.reserved));
1569 brig_code.add (&repr, sizeof (repr));
1570 brig_insn_count++;
1573 /* Emit an HSA switch jump instruction that uses a jump table to
1574 jump to a destination label. */
1576 static void
1577 emit_switch_insn (hsa_insn_sbr *sbr)
1579 struct BrigInstBr repr;
1581 gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
1582 repr.base.base.byteCount = lendian16 (sizeof (repr));
1583 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1584 repr.base.opcode = lendian16 (sbr->m_opcode);
1585 repr.width = BRIG_WIDTH_1;
1586 /* For Conditional jumps the type is always B1. */
1587 hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
1588 repr.base.type = lendian16 (index->m_type);
1589 repr.base.operands
1590 = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
1591 memset (&repr.reserved, 0, sizeof (repr.reserved));
1593 brig_code.add (&repr, sizeof (repr));
1594 brig_insn_count++;
1597 /* Emit a HSA convert instruction and all necessary directives, schedule
1598 necessary operands for writing. */
1600 static void
1601 emit_cvt_insn (hsa_insn_cvt *insn)
1603 struct BrigInstCvt repr;
1604 BrigType16_t srctype;
1606 repr.base.base.byteCount = lendian16 (sizeof (repr));
1607 repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
1608 repr.base.opcode = lendian16 (insn->m_opcode);
1609 repr.base.type = lendian16 (insn->m_type);
1610 repr.base.operands = lendian32 (emit_insn_operands (insn));
1612 if (is_a <hsa_op_reg *> (insn->get_op (1)))
1613 srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
1614 else
1615 srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
1616 repr.sourceType = lendian16 (srctype);
1617 repr.modifier = 0;
1618 /* float to smaller float requires a rounding setting (we default
1619 to 'near'. */
1620 if (hsa_type_float_p (insn->m_type)
1621 && (!hsa_type_float_p (srctype)
1622 || ((insn->m_type & BRIG_TYPE_BASE_MASK)
1623 < (srctype & BRIG_TYPE_BASE_MASK))))
1624 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1625 else if (hsa_type_integer_p (insn->m_type) &&
1626 hsa_type_float_p (srctype))
1627 repr.round = BRIG_ROUND_INTEGER_ZERO;
1628 else
1629 repr.round = BRIG_ROUND_NONE;
1630 brig_code.add (&repr, sizeof (repr));
1631 brig_insn_count++;
1634 /* Emit call instruction INSN, where this instruction must be closed
1635 within a call block instruction. */
1637 static void
1638 emit_call_insn (hsa_insn_call *call)
1640 struct BrigInstBr repr;
1642 repr.base.base.byteCount = lendian16 (sizeof (repr));
1643 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
1644 repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
1645 repr.base.type = lendian16 (BRIG_TYPE_NONE);
1647 repr.base.operands
1648 = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
1649 call->m_args_code_list));
1651 /* Internal functions have not set m_called_function. */
1652 if (call->m_called_function)
1654 function_linkage_pair pair (call->m_called_function,
1655 call->m_func.m_brig_op_offset);
1656 function_call_linkage.safe_push (pair);
1658 else
1660 hsa_internal_fn *slot
1661 = hsa_emitted_internal_decls->find (call->m_called_internal_fn);
1662 gcc_assert (slot);
1663 gcc_assert (slot->m_offset > 0);
1664 call->m_func.m_directive_offset = slot->m_offset;
1667 repr.width = BRIG_WIDTH_ALL;
1668 memset (&repr.reserved, 0, sizeof (repr.reserved));
1670 brig_code.add (&repr, sizeof (repr));
1671 brig_insn_count++;
1674 /* Emit argument block directive. */
1676 static void
1677 emit_arg_block_insn (hsa_insn_arg_block *insn)
1679 switch (insn->m_kind)
1681 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
1683 struct BrigDirectiveArgBlock repr;
1684 repr.base.byteCount = lendian16 (sizeof (repr));
1685 repr.base.kind = lendian16 (insn->m_kind);
1686 brig_code.add (&repr, sizeof (repr));
1688 for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
1690 insn->m_call_insn->m_args_code_list->m_offsets[i]
1691 = lendian32 (emit_directive_variable
1692 (insn->m_call_insn->m_input_args[i]));
1693 brig_insn_count++;
1696 if (insn->m_call_insn->m_output_arg)
1698 insn->m_call_insn->m_result_code_list->m_offsets[0]
1699 = lendian32 (emit_directive_variable
1700 (insn->m_call_insn->m_output_arg));
1701 brig_insn_count++;
1704 break;
1706 case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
1708 struct BrigDirectiveArgBlock repr;
1709 repr.base.byteCount = lendian16 (sizeof (repr));
1710 repr.base.kind = lendian16 (insn->m_kind);
1711 brig_code.add (&repr, sizeof (repr));
1712 break;
1714 default:
1715 gcc_unreachable ();
1718 brig_insn_count++;
1721 /* Emit comment directive. */
1723 static void
1724 emit_comment_insn (hsa_insn_comment *insn)
1726 struct BrigDirectiveComment repr;
1727 memset (&repr, 0, sizeof (repr));
1729 repr.base.byteCount = lendian16 (sizeof (repr));
1730 repr.base.kind = lendian16 (insn->m_opcode);
1731 repr.name = brig_emit_string (insn->m_comment, '\0', false);
1732 brig_code.add (&repr, sizeof (repr));
1735 /* Emit queue instruction INSN. */
1737 static void
1738 emit_queue_insn (hsa_insn_queue *insn)
1740 BrigInstQueue repr;
1741 memset (&repr, 0, sizeof (repr));
1743 repr.base.base.byteCount = lendian16 (sizeof (repr));
1744 repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
1745 repr.base.opcode = lendian16 (insn->m_opcode);
1746 repr.base.type = lendian16 (insn->m_type);
1747 repr.segment = insn->m_segment;
1748 repr.memoryOrder = insn->m_memory_order;
1749 repr.base.operands = lendian32 (emit_insn_operands (insn));
1750 brig_data.round_size_up (4);
1751 brig_code.add (&repr, sizeof (repr));
1753 brig_insn_count++;
1756 /* Emit source type instruction INSN. */
1758 static void
1759 emit_srctype_insn (hsa_insn_srctype *insn)
1761 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1762 struct BrigInstSourceType repr;
1763 unsigned operand_count = insn->operand_count ();
1764 gcc_checking_assert (operand_count >= 2);
1766 memset (&repr, 0, sizeof (repr));
1767 repr.sourceType = lendian16 (insn->m_source_type);
1768 repr.base.base.byteCount = lendian16 (sizeof (repr));
1769 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1770 repr.base.opcode = lendian16 (insn->m_opcode);
1771 repr.base.type = lendian16 (insn->m_type);
1773 repr.base.operands = lendian32 (emit_insn_operands (insn));
1774 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1775 brig_insn_count++;
1778 /* Emit packed instruction INSN. */
1780 static void
1781 emit_packed_insn (hsa_insn_packed *insn)
1783 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1784 struct BrigInstSourceType repr;
1785 unsigned operand_count = insn->operand_count ();
1786 gcc_checking_assert (operand_count >= 2);
1788 memset (&repr, 0, sizeof (repr));
1789 repr.sourceType = lendian16 (insn->m_source_type);
1790 repr.base.base.byteCount = lendian16 (sizeof (repr));
1791 repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
1792 repr.base.opcode = lendian16 (insn->m_opcode);
1793 repr.base.type = lendian16 (insn->m_type);
1795 if (insn->m_opcode == BRIG_OPCODE_COMBINE)
1797 /* Create operand list for packed type. */
1798 for (unsigned i = 1; i < operand_count; i++)
1800 gcc_checking_assert (insn->get_op (i));
1801 insn->m_operand_list->m_offsets[i - 1]
1802 = lendian32 (enqueue_op (insn->get_op (i)));
1805 repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
1806 insn->m_operand_list));
1808 else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
1810 /* Create operand list for packed type. */
1811 for (unsigned i = 0; i < operand_count - 1; i++)
1813 gcc_checking_assert (insn->get_op (i));
1814 insn->m_operand_list->m_offsets[i]
1815 = lendian32 (enqueue_op (insn->get_op (i)));
1818 unsigned ops = emit_operands (insn->m_operand_list,
1819 insn->get_op (insn->operand_count () - 1));
1820 repr.base.operands = lendian32 (ops);
1824 brig_code.add (&repr, sizeof (struct BrigInstSourceType));
1825 brig_insn_count++;
1828 /* Emit a basic HSA instruction and all necessary directives, schedule
1829 necessary operands for writing. */
1831 static void
1832 emit_basic_insn (hsa_insn_basic *insn)
1834 /* We assume that BrigInstMod has a BrigInstBasic prefix. */
1835 struct BrigInstMod repr;
1836 BrigType16_t type;
1838 memset (&repr, 0, sizeof (repr));
1839 repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
1840 repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
1841 repr.base.opcode = lendian16 (insn->m_opcode);
1842 switch (insn->m_opcode)
1844 /* And the bit-logical operations need bit types and whine about
1845 arithmetic types :-/ */
1846 case BRIG_OPCODE_AND:
1847 case BRIG_OPCODE_OR:
1848 case BRIG_OPCODE_XOR:
1849 case BRIG_OPCODE_NOT:
1850 type = regtype_for_type (insn->m_type);
1851 break;
1852 default:
1853 type = insn->m_type;
1854 break;
1856 repr.base.type = lendian16 (type);
1857 repr.base.operands = lendian32 (emit_insn_operands (insn));
1859 if (hsa_type_packed_p (type))
1861 if (hsa_type_float_p (type)
1862 && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
1863 repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
1864 else
1865 repr.round = 0;
1866 /* We assume that destination and sources agree in packing layout. */
1867 if (insn->num_used_ops () >= 2)
1868 repr.pack = BRIG_PACK_PP;
1869 else
1870 repr.pack = BRIG_PACK_P;
1871 repr.reserved = 0;
1872 repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
1873 repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
1874 brig_code.add (&repr, sizeof (struct BrigInstMod));
1876 else
1877 brig_code.add (&repr, sizeof (struct BrigInstBasic));
1878 brig_insn_count++;
1881 /* Emit an HSA instruction and all necessary directives, schedule necessary
1882 operands for writing. */
1884 static void
1885 emit_insn (hsa_insn_basic *insn)
1887 gcc_assert (!is_a <hsa_insn_phi *> (insn));
1889 insn->m_brig_offset = brig_code.total_size;
1891 if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
1892 emit_signal_insn (signal);
1893 else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
1894 emit_atomic_insn (atom);
1895 else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
1896 emit_memory_insn (mem);
1897 else if (insn->m_opcode == BRIG_OPCODE_LDA)
1898 emit_addr_insn (insn);
1899 else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
1900 emit_segment_insn (seg);
1901 else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
1902 emit_cmp_insn (cmp);
1903 else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
1904 emit_cond_branch_insn (br);
1905 else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
1907 if (switch_instructions == NULL)
1908 switch_instructions = new vec <hsa_insn_sbr *> ();
1910 switch_instructions->safe_push (sbr);
1911 emit_switch_insn (sbr);
1913 else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
1914 emit_generic_branch_insn (br);
1915 else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
1916 emit_arg_block_insn (block);
1917 else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
1918 emit_call_insn (call);
1919 else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
1920 emit_comment_insn (comment);
1921 else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
1922 emit_queue_insn (queue);
1923 else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
1924 emit_srctype_insn (srctype);
1925 else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
1926 emit_packed_insn (packed);
1927 else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
1928 emit_cvt_insn (cvt);
1929 else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
1930 emit_alloca_insn (alloca);
1931 else
1932 emit_basic_insn (insn);
1935 /* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
1936 or we are about to finish emitting code, if it is NULL. If the fall through
1937 edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
1939 static void
1940 perhaps_emit_branch (basic_block bb, basic_block next_bb)
1942 basic_block t_bb = NULL, ff = NULL;
1944 edge_iterator ei;
1945 edge e;
1947 /* If the last instruction of BB is a switch, ignore emission of all
1948 edges. */
1949 if (hsa_bb_for_bb (bb)->m_last_insn
1950 && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
1951 return;
1953 FOR_EACH_EDGE (e, ei, bb->succs)
1954 if (e->flags & EDGE_TRUE_VALUE)
1956 gcc_assert (!t_bb);
1957 t_bb = e->dest;
1959 else
1961 gcc_assert (!ff);
1962 ff = e->dest;
1965 if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
1966 return;
1968 emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
1971 /* Emit the a function with name NAME to the various brig sections. */
1973 void
1974 hsa_brig_emit_function (void)
1976 basic_block bb, prev_bb;
1977 hsa_insn_basic *insn;
1978 BrigDirectiveExecutable *ptr_to_fndir;
1980 brig_init ();
1982 brig_insn_count = 0;
1983 memset (&op_queue, 0, sizeof (op_queue));
1984 op_queue.projected_size = brig_operand.total_size;
1986 if (!function_offsets)
1987 function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
1989 if (!emitted_declarations)
1990 emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
1992 for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
1994 tree called = hsa_cfun->m_called_functions[i];
1996 /* If the function has no definition, emit a declaration. */
1997 if (!emitted_declarations->get (called))
1999 BrigDirectiveExecutable *e = emit_function_declaration (called);
2000 emitted_declarations->put (called, e);
2004 for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
2006 hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
2007 emit_internal_fn_decl (called);
2010 ptr_to_fndir = emit_function_directives (hsa_cfun, false);
2011 for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
2012 insn;
2013 insn = insn->m_next)
2014 emit_insn (insn);
2015 prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
2016 FOR_EACH_BB_FN (bb, cfun)
2018 perhaps_emit_branch (prev_bb, bb);
2019 emit_bb_label_directive (hsa_bb_for_bb (bb));
2020 for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
2021 emit_insn (insn);
2022 prev_bb = bb;
2024 perhaps_emit_branch (prev_bb, NULL);
2025 ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size);
2027 /* Fill up label references for all sbr instructions. */
2028 if (switch_instructions)
2030 for (unsigned i = 0; i < switch_instructions->length (); i++)
2032 hsa_insn_sbr *sbr = (*switch_instructions)[i];
2033 for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
2035 hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
2036 sbr->m_label_code_list->m_offsets[j]
2037 = hbb->m_label_ref.m_directive_offset;
2041 switch_instructions->release ();
2042 delete switch_instructions;
2043 switch_instructions = NULL;
2046 if (dump_file)
2048 fprintf (dump_file, "------- After BRIG emission: -------\n");
2049 dump_hsa_cfun (dump_file);
2052 emit_queued_operands ();
2055 /* Emit all OMP symbols related to OMP. */
2057 void
2058 hsa_brig_emit_omp_symbols (void)
2060 brig_init ();
2061 emit_directive_variable (hsa_num_threads);
2064 /* Create and return __hsa_global_variables symbol that contains
2065 all informations consumed by libgomp to link global variables
2066 with their string names used by an HSA kernel. */
2068 static tree
2069 hsa_output_global_variables ()
2071 unsigned l = hsa_global_variable_symbols->elements ();
2073 tree variable_info_type = make_node (RECORD_TYPE);
2074 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2075 get_identifier ("name"), ptr_type_node);
2076 DECL_CHAIN (id_f1) = NULL_TREE;
2077 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2078 get_identifier ("omp_data_size"),
2079 ptr_type_node);
2080 DECL_CHAIN (id_f2) = id_f1;
2081 finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
2082 NULL_TREE);
2084 tree int_num_of_global_vars;
2085 int_num_of_global_vars = build_int_cst (uint32_type_node, l);
2086 tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
2087 tree global_vars_array_type = build_array_type (variable_info_type,
2088 global_vars_num_index_type);
2089 TYPE_ARTIFICIAL (global_vars_array_type) = 1;
2091 vec<constructor_elt, va_gc> *global_vars_vec = NULL;
2093 for (hash_table <hsa_noop_symbol_hasher>::iterator it
2094 = hsa_global_variable_symbols->begin ();
2095 it != hsa_global_variable_symbols->end (); ++it)
2097 unsigned len = strlen ((*it)->m_name);
2098 char *copy = XNEWVEC (char, len + 2);
2099 copy[0] = '&';
2100 memcpy (copy + 1, (*it)->m_name, len);
2101 copy[len + 1] = '\0';
2102 len++;
2103 hsa_sanitize_name (copy);
2105 tree var_name = build_string (len, copy);
2106 TREE_TYPE (var_name)
2107 = build_array_type (char_type_node, build_index_type (size_int (len)));
2108 free (copy);
2110 vec<constructor_elt, va_gc> *variable_info_vec = NULL;
2111 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2112 build1 (ADDR_EXPR,
2113 build_pointer_type (TREE_TYPE (var_name)),
2114 var_name));
2115 CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
2116 build_fold_addr_expr ((*it)->m_decl));
2118 tree variable_info_ctor = build_constructor (variable_info_type,
2119 variable_info_vec);
2121 CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
2122 variable_info_ctor);
2125 tree global_vars_ctor = build_constructor (global_vars_array_type,
2126 global_vars_vec);
2128 char tmp_name[64];
2129 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
2130 tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2131 get_identifier (tmp_name),
2132 global_vars_array_type);
2133 TREE_STATIC (global_vars_table) = 1;
2134 TREE_READONLY (global_vars_table) = 1;
2135 TREE_PUBLIC (global_vars_table) = 0;
2136 DECL_ARTIFICIAL (global_vars_table) = 1;
2137 DECL_IGNORED_P (global_vars_table) = 1;
2138 DECL_EXTERNAL (global_vars_table) = 0;
2139 TREE_CONSTANT (global_vars_table) = 1;
2140 DECL_INITIAL (global_vars_table) = global_vars_ctor;
2141 varpool_node::finalize_decl (global_vars_table);
2143 return global_vars_table;
2146 /* Create __hsa_host_functions and __hsa_kernels that contain
2147 all informations consumed by libgomp to register all kernels
2148 in the BRIG binary. */
2150 static void
2151 hsa_output_kernels (tree *host_func_table, tree *kernels)
2153 unsigned map_count = hsa_get_number_decl_kernel_mappings ();
2155 tree int_num_of_kernels;
2156 int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
2157 tree kernel_num_index_type = build_index_type (int_num_of_kernels);
2158 tree host_functions_array_type = build_array_type (ptr_type_node,
2159 kernel_num_index_type);
2160 TYPE_ARTIFICIAL (host_functions_array_type) = 1;
2162 vec<constructor_elt, va_gc> *host_functions_vec = NULL;
2163 for (unsigned i = 0; i < map_count; ++i)
2165 tree decl = hsa_get_decl_kernel_mapping_decl (i);
2166 tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
2167 CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
2169 tree host_functions_ctor = build_constructor (host_functions_array_type,
2170 host_functions_vec);
2171 char tmp_name[64];
2172 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
2173 tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2174 get_identifier (tmp_name),
2175 host_functions_array_type);
2176 TREE_STATIC (hsa_host_func_table) = 1;
2177 TREE_READONLY (hsa_host_func_table) = 1;
2178 TREE_PUBLIC (hsa_host_func_table) = 0;
2179 DECL_ARTIFICIAL (hsa_host_func_table) = 1;
2180 DECL_IGNORED_P (hsa_host_func_table) = 1;
2181 DECL_EXTERNAL (hsa_host_func_table) = 0;
2182 TREE_CONSTANT (hsa_host_func_table) = 1;
2183 DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
2184 varpool_node::finalize_decl (hsa_host_func_table);
2185 *host_func_table = hsa_host_func_table;
2187 /* Following code emits list of kernel_info structures. */
2189 tree kernel_info_type = make_node (RECORD_TYPE);
2190 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2191 get_identifier ("name"), ptr_type_node);
2192 DECL_CHAIN (id_f1) = NULL_TREE;
2193 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2194 get_identifier ("omp_data_size"),
2195 unsigned_type_node);
2196 DECL_CHAIN (id_f2) = id_f1;
2197 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2198 get_identifier ("gridified_kernel_p"),
2199 boolean_type_node);
2200 DECL_CHAIN (id_f3) = id_f2;
2201 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2202 get_identifier ("kernel_dependencies_count"),
2203 unsigned_type_node);
2204 DECL_CHAIN (id_f4) = id_f3;
2205 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2206 get_identifier ("kernel_dependencies"),
2207 build_pointer_type (build_pointer_type
2208 (char_type_node)));
2209 DECL_CHAIN (id_f5) = id_f4;
2210 finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
2211 NULL_TREE);
2213 int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
2214 tree kernel_info_vector_type
2215 = build_array_type (kernel_info_type,
2216 build_index_type (int_num_of_kernels));
2217 TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
2219 vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
2220 tree kernel_dependencies_vector_type = NULL;
2222 for (unsigned i = 0; i < map_count; ++i)
2224 tree kernel = hsa_get_decl_kernel_mapping_decl (i);
2225 char *name = hsa_get_decl_kernel_mapping_name (i);
2226 unsigned len = strlen (name);
2227 char *copy = XNEWVEC (char, len + 2);
2228 copy[0] = '&';
2229 memcpy (copy + 1, name, len);
2230 copy[len + 1] = '\0';
2231 len++;
2233 tree kern_name = build_string (len, copy);
2234 TREE_TYPE (kern_name)
2235 = build_array_type (char_type_node, build_index_type (size_int (len)));
2236 free (copy);
2238 unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
2239 tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
2240 bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
2241 tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
2242 gridified_kernel_p);
2243 unsigned count = 0;
2244 vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
2245 if (hsa_decl_kernel_dependencies)
2247 vec<const char *> **slot;
2248 slot = hsa_decl_kernel_dependencies->get (kernel);
2249 if (slot)
2251 vec <const char *> *dependencies = *slot;
2252 count = dependencies->length ();
2254 kernel_dependencies_vector_type
2255 = build_array_type (build_pointer_type (char_type_node),
2256 build_index_type (size_int (count)));
2257 TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
2259 for (unsigned j = 0; j < count; j++)
2261 const char *d = (*dependencies)[j];
2262 len = strlen (d);
2263 tree dependency_name = build_string (len, d);
2264 TREE_TYPE (dependency_name)
2265 = build_array_type (char_type_node,
2266 build_index_type (size_int (len)));
2268 CONSTRUCTOR_APPEND_ELT
2269 (kernel_dependencies_vec, NULL_TREE,
2270 build1 (ADDR_EXPR,
2271 build_pointer_type (TREE_TYPE (dependency_name)),
2272 dependency_name));
2277 tree dependencies_count = build_int_cstu (unsigned_type_node, count);
2279 vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
2280 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2281 build1 (ADDR_EXPR,
2282 build_pointer_type (TREE_TYPE
2283 (kern_name)),
2284 kern_name));
2285 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
2286 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2287 gridified_kernel_p_tree);
2288 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
2290 if (count > 0)
2292 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
2293 gcc_checking_assert (kernel_dependencies_vector_type);
2294 tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2295 get_identifier (tmp_name),
2296 kernel_dependencies_vector_type);
2298 TREE_STATIC (dependencies_list) = 1;
2299 TREE_READONLY (dependencies_list) = 1;
2300 TREE_PUBLIC (dependencies_list) = 0;
2301 DECL_ARTIFICIAL (dependencies_list) = 1;
2302 DECL_IGNORED_P (dependencies_list) = 1;
2303 DECL_EXTERNAL (dependencies_list) = 0;
2304 TREE_CONSTANT (dependencies_list) = 1;
2305 DECL_INITIAL (dependencies_list)
2306 = build_constructor (kernel_dependencies_vector_type,
2307 kernel_dependencies_vec);
2308 varpool_node::finalize_decl (dependencies_list);
2310 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
2311 build1 (ADDR_EXPR,
2312 build_pointer_type
2313 (TREE_TYPE (dependencies_list)),
2314 dependencies_list));
2316 else
2317 CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
2319 tree kernel_info_ctor = build_constructor (kernel_info_type,
2320 kernel_info_vec);
2322 CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
2323 kernel_info_ctor);
2326 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
2327 tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2328 get_identifier (tmp_name),
2329 kernel_info_vector_type);
2331 TREE_STATIC (hsa_kernels) = 1;
2332 TREE_READONLY (hsa_kernels) = 1;
2333 TREE_PUBLIC (hsa_kernels) = 0;
2334 DECL_ARTIFICIAL (hsa_kernels) = 1;
2335 DECL_IGNORED_P (hsa_kernels) = 1;
2336 DECL_EXTERNAL (hsa_kernels) = 0;
2337 TREE_CONSTANT (hsa_kernels) = 1;
2338 DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
2339 kernel_info_vector_vec);
2340 varpool_node::finalize_decl (hsa_kernels);
2341 *kernels = hsa_kernels;
2344 /* Create a static constructor that will register out brig stuff with
2345 libgomp. */
2347 static void
2348 hsa_output_libgomp_mapping (tree brig_decl)
2350 unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
2351 unsigned global_variable_count = hsa_global_variable_symbols->elements ();
2353 tree kernels;
2354 tree host_func_table;
2356 hsa_output_kernels (&host_func_table, &kernels);
2357 tree global_vars = hsa_output_global_variables ();
2359 tree hsa_image_desc_type = make_node (RECORD_TYPE);
2360 tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2361 get_identifier ("brig_module"), ptr_type_node);
2362 DECL_CHAIN (id_f1) = NULL_TREE;
2363 tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2364 get_identifier ("kernel_count"),
2365 unsigned_type_node);
2367 DECL_CHAIN (id_f2) = id_f1;
2368 tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2369 get_identifier ("hsa_kernel_infos"),
2370 ptr_type_node);
2371 DECL_CHAIN (id_f3) = id_f2;
2372 tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2373 get_identifier ("global_variable_count"),
2374 unsigned_type_node);
2375 DECL_CHAIN (id_f4) = id_f3;
2376 tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2377 get_identifier ("hsa_global_variable_infos"),
2378 ptr_type_node);
2379 DECL_CHAIN (id_f5) = id_f4;
2380 finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
2381 NULL_TREE);
2382 TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
2384 vec<constructor_elt, va_gc> *img_desc_vec = NULL;
2385 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2386 build_fold_addr_expr (brig_decl));
2387 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2388 build_int_cstu (unsigned_type_node, kernel_count));
2389 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2390 build1 (ADDR_EXPR,
2391 build_pointer_type (TREE_TYPE (kernels)),
2392 kernels));
2393 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2394 build_int_cstu (unsigned_type_node,
2395 global_variable_count));
2396 CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
2397 build1 (ADDR_EXPR,
2398 build_pointer_type (TREE_TYPE (global_vars)),
2399 global_vars));
2401 tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
2403 char tmp_name[64];
2404 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
2405 tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2406 get_identifier (tmp_name),
2407 hsa_image_desc_type);
2408 TREE_STATIC (hsa_img_descriptor) = 1;
2409 TREE_READONLY (hsa_img_descriptor) = 1;
2410 TREE_PUBLIC (hsa_img_descriptor) = 0;
2411 DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
2412 DECL_IGNORED_P (hsa_img_descriptor) = 1;
2413 DECL_EXTERNAL (hsa_img_descriptor) = 0;
2414 TREE_CONSTANT (hsa_img_descriptor) = 1;
2415 DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
2416 varpool_node::finalize_decl (hsa_img_descriptor);
2418 /* Construct the "host_table" libgomp expects. */
2419 tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
2420 tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
2421 TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
2422 vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
2423 tree host_func_table_addr = build_fold_addr_expr (host_func_table);
2424 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2425 host_func_table_addr);
2426 offset_int func_table_size
2427 = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
2428 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
2429 fold_build2 (POINTER_PLUS_EXPR,
2430 TREE_TYPE (host_func_table_addr),
2431 host_func_table_addr,
2432 build_int_cst (size_type_node,
2433 func_table_size.to_uhwi
2434 ())));
2435 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2436 CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
2437 tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
2438 libgomp_host_table_vec);
2439 ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
2440 tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
2441 get_identifier (tmp_name),
2442 libgomp_host_table_type);
2444 TREE_STATIC (hsa_libgomp_host_table) = 1;
2445 TREE_READONLY (hsa_libgomp_host_table) = 1;
2446 TREE_PUBLIC (hsa_libgomp_host_table) = 0;
2447 DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
2448 DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
2449 DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
2450 TREE_CONSTANT (hsa_libgomp_host_table) = 1;
2451 DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
2452 varpool_node::finalize_decl (hsa_libgomp_host_table);
2454 /* Generate an initializer with a call to the registration routine. */
2456 tree offload_register
2457 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
2458 gcc_checking_assert (offload_register);
2460 tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
2461 append_to_statement_list
2462 (build_call_expr (offload_register, 4,
2463 build_int_cstu (unsigned_type_node,
2464 GOMP_VERSION_PACK (GOMP_VERSION,
2465 GOMP_VERSION_HSA)),
2466 build_fold_addr_expr (hsa_libgomp_host_table),
2467 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2468 build_fold_addr_expr (hsa_img_descriptor)),
2469 hsa_ctor_stmts);
2471 cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
2473 tree offload_unregister
2474 = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
2475 gcc_checking_assert (offload_unregister);
2477 tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
2478 append_to_statement_list
2479 (build_call_expr (offload_unregister, 4,
2480 build_int_cstu (unsigned_type_node,
2481 GOMP_VERSION_PACK (GOMP_VERSION,
2482 GOMP_VERSION_HSA)),
2483 build_fold_addr_expr (hsa_libgomp_host_table),
2484 build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
2485 build_fold_addr_expr (hsa_img_descriptor)),
2486 hsa_dtor_stmts);
2487 cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
2490 /* Emit the brig module we have compiled to a section in the final assembly and
2491 also create a compile unit static constructor that will register the brig
2492 module with libgomp. */
2494 void
2495 hsa_output_brig (void)
2497 section *saved_section;
2499 if (!brig_initialized)
2500 return;
2502 for (unsigned i = 0; i < function_call_linkage.length (); i++)
2504 function_linkage_pair p = function_call_linkage[i];
2506 BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
2507 gcc_assert (*func_offset);
2508 BrigOperandCodeRef *code_ref
2509 = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
2510 gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
2511 code_ref->ref = lendian32 (*func_offset);
2514 /* Iterate all function declarations and if we meet a function that should
2515 have module linkage and we are unable to emit HSAIL for the function,
2516 then change the linkage to program linkage. Doing so, we will emit
2517 a valid BRIG image. */
2518 if (hsa_failed_functions != NULL && emitted_declarations != NULL)
2519 for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
2520 = emitted_declarations->begin ();
2521 it != emitted_declarations->end ();
2522 ++it)
2524 if (hsa_failed_functions->contains ((*it).first))
2525 (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
2528 saved_section = in_section;
2530 switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
2531 char tmp_name[64];
2532 ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
2533 ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
2534 tree brig_id = get_identifier (tmp_name);
2535 tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
2536 char_type_node);
2537 SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
2538 TREE_ADDRESSABLE (brig_decl) = 1;
2539 TREE_READONLY (brig_decl) = 1;
2540 DECL_ARTIFICIAL (brig_decl) = 1;
2541 DECL_IGNORED_P (brig_decl) = 1;
2542 TREE_STATIC (brig_decl) = 1;
2543 TREE_PUBLIC (brig_decl) = 0;
2544 TREE_USED (brig_decl) = 1;
2545 DECL_INITIAL (brig_decl) = brig_decl;
2546 TREE_ASM_WRITTEN (brig_decl) = 1;
2548 BrigModuleHeader module_header;
2549 memcpy (&module_header.identification, "HSA BRIG",
2550 sizeof (module_header.identification));
2551 module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
2552 module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
2553 uint64_t section_index[3];
2555 int data_padding, code_padding, operand_padding;
2556 data_padding = HSA_SECTION_ALIGNMENT
2557 - brig_data.total_size % HSA_SECTION_ALIGNMENT;
2558 code_padding = HSA_SECTION_ALIGNMENT
2559 - brig_code.total_size % HSA_SECTION_ALIGNMENT;
2560 operand_padding = HSA_SECTION_ALIGNMENT
2561 - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
2563 uint64_t module_size = sizeof (module_header)
2564 + sizeof (section_index)
2565 + brig_data.total_size
2566 + data_padding
2567 + brig_code.total_size
2568 + code_padding
2569 + brig_operand.total_size
2570 + operand_padding;
2571 gcc_assert ((module_size % 16) == 0);
2572 module_header.byteCount = lendian64 (module_size);
2573 memset (&module_header.hash, 0, sizeof (module_header.hash));
2574 module_header.reserved = 0;
2575 module_header.sectionCount = lendian32 (3);
2576 module_header.sectionIndex = lendian64 (sizeof (module_header));
2577 assemble_string ((const char *) &module_header, sizeof (module_header));
2578 uint64_t off = sizeof (module_header) + sizeof (section_index);
2579 section_index[0] = lendian64 (off);
2580 off += brig_data.total_size + data_padding;
2581 section_index[1] = lendian64 (off);
2582 off += brig_code.total_size + code_padding;
2583 section_index[2] = lendian64 (off);
2584 assemble_string ((const char *) &section_index, sizeof (section_index));
2586 char padding[HSA_SECTION_ALIGNMENT];
2587 memset (padding, 0, sizeof (padding));
2589 brig_data.output ();
2590 assemble_string (padding, data_padding);
2591 brig_code.output ();
2592 assemble_string (padding, code_padding);
2593 brig_operand.output ();
2594 assemble_string (padding, operand_padding);
2596 if (saved_section)
2597 switch_to_section (saved_section);
2599 hsa_output_libgomp_mapping (brig_decl);
2601 hsa_free_decl_kernel_mapping ();
2602 brig_release_data ();
2603 hsa_deinit_compilation_unit_data ();
2605 delete emitted_declarations;
2606 emitted_declarations = NULL;
2607 delete function_offsets;
2608 function_offsets = NULL;