Record configure regenerate
[official-gcc.git] / libgomp / plugin / plugin-hsa.c
blobd88849338dca773ad7672c1b68a486ca2273f114
1 /* Plugin for HSAIL execution.
3 Copyright (C) 2013-2016 Free Software Foundation, Inc.
5 Contributed by Martin Jambor <mjambor@suse.cz> and
6 Martin Liska <mliska@suse.cz>.
8 This file is part of the GNU Offloading and Multi Processing Library
9 (libgomp).
11 Libgomp is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3, or (at your option)
14 any later version.
16 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
17 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 more details.
21 Under Section 7 of GPL version 3, you are granted additional
22 permissions described in the GCC Runtime Library Exception, version
23 3.1, as published by the Free Software Foundation.
25 You should have received a copy of the GNU General Public License and
26 a copy of the GCC Runtime Library Exception along with this program;
27 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
28 <http://www.gnu.org/licenses/>. */
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <pthread.h>
34 #include <hsa.h>
35 #include <hsa_ext_finalize.h>
36 #include <dlfcn.h>
37 #include "libgomp-plugin.h"
38 #include "gomp-constants.h"
40 /* Keep the following GOMP prefixed structures in sync with respective parts of
41 the compiler. */
43 /* Structure describing the run-time and grid properties of an HSA kernel
44 lauch. */
46 struct GOMP_kernel_launch_attributes
48 /* Number of dimensions the workload has. Maximum number is 3. */
49 uint32_t ndim;
50 /* Size of the grid in the three respective dimensions. */
51 uint32_t gdims[3];
52 /* Size of work-groups in the respective dimensions. */
53 uint32_t wdims[3];
56 /* Collection of information needed for a dispatch of a kernel from a
57 kernel. */
59 struct GOMP_hsa_kernel_dispatch
61 /* Pointer to a command queue associated with a kernel dispatch agent. */
62 void *queue;
63 /* Pointer to reserved memory for OMP data struct copying. */
64 void *omp_data_memory;
65 /* Pointer to a memory space used for kernel arguments passing. */
66 void *kernarg_address;
67 /* Kernel object. */
68 uint64_t object;
69 /* Synchronization signal used for dispatch synchronization. */
70 uint64_t signal;
71 /* Private segment size. */
72 uint32_t private_segment_size;
73 /* Group segment size. */
74 uint32_t group_segment_size;
75 /* Number of children kernel dispatches. */
76 uint64_t kernel_dispatch_count;
77 /* Debug purpose argument. */
78 uint64_t debug;
79 /* Levels-var ICV. */
80 uint64_t omp_level;
81 /* Kernel dispatch structures created for children kernel dispatches. */
82 struct GOMP_hsa_kernel_dispatch **children_dispatches;
83 /* Number of threads. */
84 uint32_t omp_num_threads;
87 /* Part of the libgomp plugin interface. Return the name of the accelerator,
88 which is "hsa". */
90 const char *
91 GOMP_OFFLOAD_get_name (void)
93 return "hsa";
96 /* Part of the libgomp plugin interface. Return the specific capabilities the
97 HSA accelerator have. */
99 unsigned int
100 GOMP_OFFLOAD_get_caps (void)
102 return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
105 /* Part of the libgomp plugin interface. Identify as HSA accelerator. */
108 GOMP_OFFLOAD_get_type (void)
110 return OFFLOAD_TARGET_TYPE_HSA;
113 /* Return the libgomp version number we're compatible with. There is
114 no requirement for cross-version compatibility. */
116 unsigned
117 GOMP_OFFLOAD_version (void)
119 return GOMP_VERSION;
122 /* Flag to decide whether print to stderr information about what is going on.
123 Set in init_debug depending on environment variables. */
125 static bool debug;
127 /* Flag to decide if the runtime should suppress a possible fallback to host
128 execution. */
130 static bool suppress_host_fallback;
132 /* Initialize debug and suppress_host_fallback according to the environment. */
134 static void
135 init_enviroment_variables (void)
137 if (getenv ("HSA_DEBUG"))
138 debug = true;
139 else
140 debug = false;
142 if (getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
143 suppress_host_fallback = true;
144 else
145 suppress_host_fallback = false;
148 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
149 is set to true. */
151 #define HSA_LOG(prefix, ...) \
152 do \
154 if (debug) \
156 fprintf (stderr, prefix); \
157 fprintf (stderr, __VA_ARGS__); \
160 while (false);
162 /* Print a debugging message to stderr. */
164 #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
166 /* Print a warning message to stderr. */
168 #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
170 /* Print HSA warning STR with an HSA STATUS code. */
172 static void
173 hsa_warn (const char *str, hsa_status_t status)
175 if (!debug)
176 return;
178 const char *hsa_error;
179 hsa_status_string (status, &hsa_error);
181 fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error);
184 /* Report a fatal error STR together with the HSA error corresponding to STATUS
185 and terminate execution of the current process. */
187 static void
188 hsa_fatal (const char *str, hsa_status_t status)
190 const char *hsa_error;
191 hsa_status_string (status, &hsa_error);
192 GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
193 hsa_error);
196 struct hsa_kernel_description
198 const char *name;
199 unsigned omp_data_size;
200 bool gridified_kernel_p;
201 unsigned kernel_dependencies_count;
202 const char **kernel_dependencies;
205 struct global_var_info
207 const char *name;
208 void *address;
211 /* Data passed by the static initializer of a compilation unit containing BRIG
212 to GOMP_offload_register. */
214 struct brig_image_desc
216 hsa_ext_module_t brig_module;
217 const unsigned kernel_count;
218 struct hsa_kernel_description *kernel_infos;
219 const unsigned global_variable_count;
220 struct global_var_info *global_variables;
223 struct agent_info;
225 /* Information required to identify, finalize and run any given kernel. */
227 struct kernel_info
229 /* Name of the kernel, required to locate it within the brig module. */
230 const char *name;
231 /* Size of memory space for OMP data. */
232 unsigned omp_data_size;
233 /* The specific agent the kernel has been or will be finalized for and run
234 on. */
235 struct agent_info *agent;
236 /* The specific module where the kernel takes place. */
237 struct module_info *module;
238 /* Mutex enforcing that at most once thread ever initializes a kernel for
239 use. A thread should have locked agent->modules_rwlock for reading before
240 acquiring it. */
241 pthread_mutex_t init_mutex;
242 /* Flag indicating whether the kernel has been initialized and all fields
243 below it contain valid data. */
244 bool initialized;
245 /* Flag indicating that the kernel has a problem that blocks an execution. */
246 bool initialization_failed;
247 /* The object to be put into the dispatch queue. */
248 uint64_t object;
249 /* Required size of kernel arguments. */
250 uint32_t kernarg_segment_size;
251 /* Required size of group segment. */
252 uint32_t group_segment_size;
253 /* Required size of private segment. */
254 uint32_t private_segment_size;
255 /* List of all kernel dependencies. */
256 const char **dependencies;
257 /* Number of dependencies. */
258 unsigned dependencies_count;
259 /* Maximum OMP data size necessary for kernel from kernel dispatches. */
260 unsigned max_omp_data_size;
261 /* True if the kernel is gridified. */
262 bool gridified_kernel_p;
265 /* Information about a particular brig module, its image and kernels. */
267 struct module_info
269 /* The next and previous module in the linked list of modules of an agent. */
270 struct module_info *next, *prev;
271 /* The description with which the program has registered the image. */
272 struct brig_image_desc *image_desc;
274 /* Number of kernels in this module. */
275 int kernel_count;
276 /* An array of kernel_info structures describing each kernel in this
277 module. */
278 struct kernel_info kernels[];
281 /* Information about shared brig library. */
283 struct brig_library_info
285 char *file_name;
286 hsa_ext_module_t image;
289 /* Description of an HSA GPU agent and the program associated with it. */
291 struct agent_info
293 /* The HSA ID of the agent. Assigned when hsa_context is initialized. */
294 hsa_agent_t id;
295 /* Whether the agent has been initialized. The fields below are usable only
296 if it has been. */
297 bool initialized;
298 /* The HSA ISA of this agent. */
299 hsa_isa_t isa;
300 /* Command queue of the agent. */
301 hsa_queue_t *command_q;
302 /* Kernel from kernel dispatch command queue. */
303 hsa_queue_t *kernel_dispatch_command_q;
304 /* The HSA memory region from which to allocate kernel arguments. */
305 hsa_region_t kernarg_region;
307 /* Read-write lock that protects kernels which are running or about to be run
308 from interference with loading and unloading of images. Needs to be
309 locked for reading while a kernel is being run, and for writing if the
310 list of modules is manipulated (and thus the HSA program invalidated). */
311 pthread_rwlock_t modules_rwlock;
312 /* The first module in a linked list of modules associated with this
313 kernel. */
314 struct module_info *first_module;
316 /* Mutex enforcing that only one thread will finalize the HSA program. A
317 thread should have locked agent->modules_rwlock for reading before
318 acquiring it. */
319 pthread_mutex_t prog_mutex;
320 /* Flag whether the HSA program that consists of all the modules has been
321 finalized. */
322 bool prog_finalized;
323 /* Flag whether the program was finalized but with a failure. */
324 bool prog_finalized_error;
325 /* HSA executable - the finalized program that is used to locate kernels. */
326 hsa_executable_t executable;
327 /* List of BRIG libraries. */
328 struct brig_library_info **brig_libraries;
329 /* Number of loaded shared BRIG libraries. */
330 unsigned brig_libraries_count;
333 /* Information about the whole HSA environment and all of its agents. */
335 struct hsa_context_info
337 /* Whether the structure has been initialized. */
338 bool initialized;
339 /* Number of usable GPU HSA agents in the system. */
340 int agent_count;
341 /* Array of agent_info structures describing the individual HSA agents. */
342 struct agent_info *agents;
345 /* Information about the whole HSA environment and all of its agents. */
347 static struct hsa_context_info hsa_context;
349 /* Find kernel for an AGENT by name provided in KERNEL_NAME. */
351 static struct kernel_info *
352 get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
354 struct module_info *module = agent->first_module;
356 while (module)
358 for (unsigned i = 0; i < module->kernel_count; i++)
359 if (strcmp (module->kernels[i].name, kernel_name) == 0)
360 return &module->kernels[i];
362 module = module->next;
365 return NULL;
368 /* Return true if the agent is a GPU and acceptable of concurrent submissions
369 from different threads. */
371 static bool
372 suitable_hsa_agent_p (hsa_agent_t agent)
374 hsa_device_type_t device_type;
375 hsa_status_t status
376 = hsa_agent_get_info (agent, HSA_AGENT_INFO_DEVICE, &device_type);
377 if (status != HSA_STATUS_SUCCESS || device_type != HSA_DEVICE_TYPE_GPU)
378 return false;
380 uint32_t features = 0;
381 status = hsa_agent_get_info (agent, HSA_AGENT_INFO_FEATURE, &features);
382 if (status != HSA_STATUS_SUCCESS
383 || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
384 return false;
385 hsa_queue_type_t queue_type;
386 status = hsa_agent_get_info (agent, HSA_AGENT_INFO_QUEUE_TYPE, &queue_type);
387 if (status != HSA_STATUS_SUCCESS
388 || (queue_type != HSA_QUEUE_TYPE_MULTI))
389 return false;
391 return true;
394 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
395 agent_count in hsa_context. */
397 static hsa_status_t
398 count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
400 if (suitable_hsa_agent_p (agent))
401 hsa_context.agent_count++;
402 return HSA_STATUS_SUCCESS;
405 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
406 id to the describing structure in the hsa context. The index of the
407 structure is pointed to by DATA, increment it afterwards. */
409 static hsa_status_t
410 assign_agent_ids (hsa_agent_t agent, void *data)
412 if (suitable_hsa_agent_p (agent))
414 int *agent_index = (int *) data;
415 hsa_context.agents[*agent_index].id = agent;
416 ++*agent_index;
418 return HSA_STATUS_SUCCESS;
421 /* Initialize hsa_context if it has not already been done. */
423 static void
424 init_hsa_context (void)
426 hsa_status_t status;
427 int agent_index = 0;
429 if (hsa_context.initialized)
430 return;
431 init_enviroment_variables ();
432 status = hsa_init ();
433 if (status != HSA_STATUS_SUCCESS)
434 hsa_fatal ("Run-time could not be initialized", status);
435 HSA_DEBUG ("HSA run-time initialized\n");
436 status = hsa_iterate_agents (count_gpu_agents, NULL);
437 if (status != HSA_STATUS_SUCCESS)
438 hsa_fatal ("HSA GPU devices could not be enumerated", status);
439 HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
441 hsa_context.agents
442 = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
443 * sizeof (struct agent_info));
444 status = hsa_iterate_agents (assign_agent_ids, &agent_index);
445 if (agent_index != hsa_context.agent_count)
446 GOMP_PLUGIN_fatal ("Failed to assign IDs to all HSA agents");
447 hsa_context.initialized = true;
450 /* Callback of dispatch queues to report errors. */
452 static void
453 queue_callback (hsa_status_t status,
454 hsa_queue_t *queue __attribute__ ((unused)),
455 void *data __attribute__ ((unused)))
457 hsa_fatal ("Asynchronous queue error", status);
460 /* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
461 used for kernarg allocations and if so write it to the memory pointed to by
462 DATA and break the query. */
464 static hsa_status_t
465 get_kernarg_memory_region (hsa_region_t region, void *data)
467 hsa_status_t status;
468 hsa_region_segment_t segment;
470 status = hsa_region_get_info (region, HSA_REGION_INFO_SEGMENT, &segment);
471 if (status != HSA_STATUS_SUCCESS)
472 return status;
473 if (segment != HSA_REGION_SEGMENT_GLOBAL)
474 return HSA_STATUS_SUCCESS;
476 uint32_t flags;
477 status = hsa_region_get_info (region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
478 if (status != HSA_STATUS_SUCCESS)
479 return status;
480 if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
482 hsa_region_t *ret = (hsa_region_t *) data;
483 *ret = region;
484 return HSA_STATUS_INFO_BREAK;
486 return HSA_STATUS_SUCCESS;
489 /* Part of the libgomp plugin interface. Return the number of HSA devices on
490 the system. */
493 GOMP_OFFLOAD_get_num_devices (void)
495 init_hsa_context ();
496 return hsa_context.agent_count;
499 /* Part of the libgomp plugin interface. Initialize agent number N so that it
500 can be used for computation. */
502 void
503 GOMP_OFFLOAD_init_device (int n)
505 init_hsa_context ();
506 if (n >= hsa_context.agent_count)
507 GOMP_PLUGIN_fatal ("Request to initialize non-existing HSA device %i", n);
508 struct agent_info *agent = &hsa_context.agents[n];
510 if (agent->initialized)
511 return;
513 if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
514 GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent rwlock");
515 if (pthread_mutex_init (&agent->prog_mutex, NULL))
516 GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent program mutex");
518 uint32_t queue_size;
519 hsa_status_t status;
520 status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
521 &queue_size);
522 if (status != HSA_STATUS_SUCCESS)
523 hsa_fatal ("Error requesting maximum queue size of the HSA agent", status);
524 status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_ISA, &agent->isa);
525 if (status != HSA_STATUS_SUCCESS)
526 hsa_fatal ("Error querying the ISA of the agent", status);
527 status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
528 queue_callback, NULL, UINT32_MAX, UINT32_MAX,
529 &agent->command_q);
530 if (status != HSA_STATUS_SUCCESS)
531 hsa_fatal ("Error creating command queue", status);
533 status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
534 queue_callback, NULL, UINT32_MAX, UINT32_MAX,
535 &agent->kernel_dispatch_command_q);
536 if (status != HSA_STATUS_SUCCESS)
537 hsa_fatal ("Error creating kernel dispatch command queue", status);
539 agent->kernarg_region.handle = (uint64_t) -1;
540 status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region,
541 &agent->kernarg_region);
542 if (agent->kernarg_region.handle == (uint64_t) -1)
543 GOMP_PLUGIN_fatal ("Could not find suitable memory region for kernel "
544 "arguments");
545 HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
546 (long long unsigned) agent->command_q->id);
547 HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
548 (long long unsigned) agent->kernel_dispatch_command_q->id);
549 agent->initialized = true;
552 /* Verify that hsa_context has already been initialized and return the
553 agent_info structure describing device number N. */
555 static struct agent_info *
556 get_agent_info (int n)
558 if (!hsa_context.initialized)
559 GOMP_PLUGIN_fatal ("Attempt to use uninitialized HSA context.");
560 if (n >= hsa_context.agent_count)
561 GOMP_PLUGIN_fatal ("Request to operate on anon-existing HSA device %i", n);
562 if (!hsa_context.agents[n].initialized)
563 GOMP_PLUGIN_fatal ("Attempt to use an uninitialized HSA agent.");
564 return &hsa_context.agents[n];
567 /* Insert MODULE to the linked list of modules of AGENT. */
569 static void
570 add_module_to_agent (struct agent_info *agent, struct module_info *module)
572 if (agent->first_module)
573 agent->first_module->prev = module;
574 module->next = agent->first_module;
575 module->prev = NULL;
576 agent->first_module = module;
579 /* Remove MODULE from the linked list of modules of AGENT. */
581 static void
582 remove_module_from_agent (struct agent_info *agent, struct module_info *module)
584 if (agent->first_module == module)
585 agent->first_module = module->next;
586 if (module->prev)
587 module->prev->next = module->next;
588 if (module->next)
589 module->next->prev = module->prev;
592 /* Free the HSA program in agent and everything associated with it and set
593 agent->prog_finalized and the initialized flags of all kernels to false. */
595 static void
596 destroy_hsa_program (struct agent_info *agent)
598 if (!agent->prog_finalized || agent->prog_finalized_error)
599 return;
601 hsa_status_t status;
603 HSA_DEBUG ("Destroying the current HSA program.\n");
605 status = hsa_executable_destroy (agent->executable);
606 if (status != HSA_STATUS_SUCCESS)
607 hsa_fatal ("Could not destroy HSA executable", status);
609 struct module_info *module;
610 for (module = agent->first_module; module; module = module->next)
612 int i;
613 for (i = 0; i < module->kernel_count; i++)
614 module->kernels[i].initialized = false;
616 agent->prog_finalized = false;
619 /* Part of the libgomp plugin interface. Load BRIG module described by struct
620 brig_image_desc in TARGET_DATA and return references to kernel descriptors
621 in TARGET_TABLE. */
624 GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data,
625 struct addr_pair **target_table)
627 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
628 GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin"
629 " (expected %u, received %u)",
630 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
632 struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
633 struct agent_info *agent;
634 struct addr_pair *pair;
635 struct module_info *module;
636 struct kernel_info *kernel;
637 int kernel_count = image_desc->kernel_count;
639 agent = get_agent_info (ord);
640 if (pthread_rwlock_wrlock (&agent->modules_rwlock))
641 GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
642 if (agent->prog_finalized)
643 destroy_hsa_program (agent);
645 HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
646 pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
647 *target_table = pair;
648 module = (struct module_info *)
649 GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
650 + kernel_count * sizeof (struct kernel_info));
651 module->image_desc = image_desc;
652 module->kernel_count = kernel_count;
654 kernel = &module->kernels[0];
656 /* Allocate memory for kernel dependencies. */
657 for (unsigned i = 0; i < kernel_count; i++)
659 pair->start = (uintptr_t) kernel;
660 pair->end = (uintptr_t) (kernel + 1);
662 struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
663 kernel->agent = agent;
664 kernel->module = module;
665 kernel->name = d->name;
666 kernel->omp_data_size = d->omp_data_size;
667 kernel->gridified_kernel_p = d->gridified_kernel_p;
668 kernel->dependencies_count = d->kernel_dependencies_count;
669 kernel->dependencies = d->kernel_dependencies;
670 if (pthread_mutex_init (&kernel->init_mutex, NULL))
671 GOMP_PLUGIN_fatal ("Failed to initialize an HSA kernel mutex");
673 kernel++;
674 pair++;
677 add_module_to_agent (agent, module);
678 if (pthread_rwlock_unlock (&agent->modules_rwlock))
679 GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
680 return kernel_count;
683 /* Add a shared BRIG library from a FILE_NAME to an AGENT. */
685 static struct brig_library_info *
686 add_shared_library (const char *file_name, struct agent_info *agent)
688 struct brig_library_info *library = NULL;
690 void *f = dlopen (file_name, RTLD_NOW);
691 void *start = dlsym (f, "__brig_start");
692 void *end = dlsym (f, "__brig_end");
694 if (start == NULL || end == NULL)
695 return NULL;
697 unsigned size = end - start;
698 char *buf = (char *) GOMP_PLUGIN_malloc (size);
699 memcpy (buf, start, size);
701 library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
702 library->file_name = (char *) GOMP_PLUGIN_malloc
703 ((strlen (file_name) + 1));
704 strcpy (library->file_name, file_name);
705 library->image = (hsa_ext_module_t) buf;
707 return library;
710 /* Release memory used for BRIG shared libraries that correspond
711 to an AGENT. */
713 static void
714 release_agent_shared_libraries (struct agent_info *agent)
716 for (unsigned i = 0; i < agent->brig_libraries_count; i++)
717 if (agent->brig_libraries[i])
719 free (agent->brig_libraries[i]->file_name);
720 free (agent->brig_libraries[i]->image);
721 free (agent->brig_libraries[i]);
724 free (agent->brig_libraries);
727 /* Create and finalize the program consisting of all loaded modules. */
729 static void
730 create_and_finalize_hsa_program (struct agent_info *agent)
732 hsa_status_t status;
733 hsa_ext_program_t prog_handle;
734 int mi = 0;
736 if (pthread_mutex_lock (&agent->prog_mutex))
737 GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
738 if (agent->prog_finalized)
739 goto final;
741 status = hsa_ext_program_create (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
742 HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
743 NULL, &prog_handle);
744 if (status != HSA_STATUS_SUCCESS)
745 hsa_fatal ("Could not create an HSA program", status);
747 HSA_DEBUG ("Created a finalized program\n");
749 struct module_info *module = agent->first_module;
750 while (module)
752 status = hsa_ext_program_add_module (prog_handle,
753 module->image_desc->brig_module);
754 if (status != HSA_STATUS_SUCCESS)
755 hsa_fatal ("Could not add a module to the HSA program", status);
756 module = module->next;
757 mi++;
760 /* Load all shared libraries. */
761 const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
762 const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
764 agent->brig_libraries_count = libraries_count;
765 agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
766 (sizeof (struct brig_library_info) * libraries_count);
768 for (unsigned i = 0; i < libraries_count; i++)
770 struct brig_library_info *library = add_shared_library (libraries[i],
771 agent);
772 if (library == NULL)
774 HSA_WARNING ("Could not open a shared BRIG library: %s\n",
775 libraries[i]);
776 continue;
779 status = hsa_ext_program_add_module (prog_handle, library->image);
780 if (status != HSA_STATUS_SUCCESS)
781 hsa_warn ("Could not add a shared BRIG library the HSA program",
782 status);
783 else
784 HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
785 libraries[i]);
788 hsa_ext_control_directives_t control_directives;
789 memset (&control_directives, 0, sizeof (control_directives));
790 hsa_code_object_t code_object;
791 status = hsa_ext_program_finalize (prog_handle, agent->isa,
792 HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
793 control_directives, "",
794 HSA_CODE_OBJECT_TYPE_PROGRAM,
795 &code_object);
796 if (status != HSA_STATUS_SUCCESS)
798 hsa_warn ("Finalization of the HSA program failed", status);
799 goto failure;
802 HSA_DEBUG ("Finalization done\n");
803 hsa_ext_program_destroy (prog_handle);
805 status
806 = hsa_executable_create (HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN,
807 "", &agent->executable);
808 if (status != HSA_STATUS_SUCCESS)
809 hsa_fatal ("Could not create HSA executable", status);
811 module = agent->first_module;
812 while (module)
814 /* Initialize all global variables declared in the module. */
815 for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
817 struct global_var_info *var;
818 var = &module->image_desc->global_variables[i];
819 status
820 = hsa_executable_global_variable_define (agent->executable,
821 var->name, var->address);
823 HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
824 var->address);
826 if (status != HSA_STATUS_SUCCESS)
827 hsa_fatal ("Could not define a global variable in the HSA program",
828 status);
831 module = module->next;
834 status = hsa_executable_load_code_object (agent->executable, agent->id,
835 code_object, "");
836 if (status != HSA_STATUS_SUCCESS)
837 hsa_fatal ("Could not add a code object to the HSA executable", status);
838 status = hsa_executable_freeze (agent->executable, "");
839 if (status != HSA_STATUS_SUCCESS)
840 hsa_fatal ("Could not freeze the HSA executable", status);
842 HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
844 /* If all goes good, jump to final. */
845 goto final;
847 failure:
848 agent->prog_finalized_error = true;
850 final:
851 agent->prog_finalized = true;
853 if (pthread_mutex_unlock (&agent->prog_mutex))
854 GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
857 /* Create kernel dispatch data structure for given KERNEL. */
859 static struct GOMP_hsa_kernel_dispatch *
860 create_single_kernel_dispatch (struct kernel_info *kernel,
861 unsigned omp_data_size)
863 struct agent_info *agent = kernel->agent;
864 struct GOMP_hsa_kernel_dispatch *shadow
865 = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
867 shadow->queue = agent->command_q;
868 shadow->omp_data_memory
869 = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
870 unsigned dispatch_count = kernel->dependencies_count;
871 shadow->kernel_dispatch_count = dispatch_count;
873 shadow->children_dispatches
874 = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
876 shadow->object = kernel->object;
878 hsa_signal_t sync_signal;
879 hsa_status_t status = hsa_signal_create (1, 0, NULL, &sync_signal);
880 if (status != HSA_STATUS_SUCCESS)
881 hsa_fatal ("Error creating the HSA sync signal", status);
883 shadow->signal = sync_signal.handle;
884 shadow->private_segment_size = kernel->private_segment_size;
885 shadow->group_segment_size = kernel->group_segment_size;
887 status
888 = hsa_memory_allocate (agent->kernarg_region, kernel->kernarg_segment_size,
889 &shadow->kernarg_address);
890 if (status != HSA_STATUS_SUCCESS)
891 hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
893 return shadow;
896 /* Release data structure created for a kernel dispatch in SHADOW argument. */
898 static void
899 release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
901 HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow,
902 shadow->debug, (void *) shadow->debug);
904 hsa_memory_free (shadow->kernarg_address);
906 hsa_signal_t s;
907 s.handle = shadow->signal;
908 hsa_signal_destroy (s);
910 free (shadow->omp_data_memory);
912 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
913 release_kernel_dispatch (shadow->children_dispatches[i]);
915 free (shadow->children_dispatches);
916 free (shadow);
919 /* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used
920 to calculate maximum necessary memory for OMP data allocation. */
922 static void
923 init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
925 hsa_status_t status;
926 struct agent_info *agent = kernel->agent;
927 hsa_executable_symbol_t kernel_symbol;
928 status = hsa_executable_get_symbol (agent->executable, NULL, kernel->name,
929 agent->id, 0, &kernel_symbol);
930 if (status != HSA_STATUS_SUCCESS)
932 hsa_warn ("Could not find symbol for kernel in the code object", status);
933 goto failure;
935 HSA_DEBUG ("Located kernel %s\n", kernel->name);
936 status
937 = hsa_executable_symbol_get_info (kernel_symbol,
938 HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
939 &kernel->object);
940 if (status != HSA_STATUS_SUCCESS)
941 hsa_fatal ("Could not extract a kernel object from its symbol", status);
942 status = hsa_executable_symbol_get_info
943 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
944 &kernel->kernarg_segment_size);
945 if (status != HSA_STATUS_SUCCESS)
946 hsa_fatal ("Could not get info about kernel argument size", status);
947 status = hsa_executable_symbol_get_info
948 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
949 &kernel->group_segment_size);
950 if (status != HSA_STATUS_SUCCESS)
951 hsa_fatal ("Could not get info about kernel group segment size", status);
952 status = hsa_executable_symbol_get_info
953 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
954 &kernel->private_segment_size);
955 if (status != HSA_STATUS_SUCCESS)
956 hsa_fatal ("Could not get info about kernel private segment size",
957 status);
959 HSA_DEBUG ("Kernel structure for %s fully initialized with "
960 "following segment sizes: \n", kernel->name);
961 HSA_DEBUG (" group_segment_size: %u\n",
962 (unsigned) kernel->group_segment_size);
963 HSA_DEBUG (" private_segment_size: %u\n",
964 (unsigned) kernel->private_segment_size);
965 HSA_DEBUG (" kernarg_segment_size: %u\n",
966 (unsigned) kernel->kernarg_segment_size);
967 HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size);
968 HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
970 if (kernel->omp_data_size > *max_omp_data_size)
971 *max_omp_data_size = kernel->omp_data_size;
973 for (unsigned i = 0; i < kernel->dependencies_count; i++)
975 struct kernel_info *dependency
976 = get_kernel_for_agent (agent, kernel->dependencies[i]);
978 if (dependency == NULL)
980 HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
981 "dependency name: %s\n", kernel->name,
982 kernel->dependencies[i]);
983 goto failure;
986 if (dependency->dependencies_count > 0)
988 HSA_DEBUG ("HSA does not allow kernel dispatching code with "
989 "a depth bigger than one\n")
990 goto failure;
993 init_single_kernel (dependency, max_omp_data_size);
996 return;
998 failure:
999 kernel->initialization_failed = true;
1002 /* Indent stream F by INDENT spaces. */
1004 static void
1005 indent_stream (FILE *f, unsigned indent)
1007 fprintf (f, "%*s", indent, "");
1010 /* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */
1012 static void
1013 print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
1015 indent_stream (stderr, indent);
1016 fprintf (stderr, "this: %p\n", dispatch);
1017 indent_stream (stderr, indent);
1018 fprintf (stderr, "queue: %p\n", dispatch->queue);
1019 indent_stream (stderr, indent);
1020 fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
1021 indent_stream (stderr, indent);
1022 fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
1023 indent_stream (stderr, indent);
1024 fprintf (stderr, "object: %lu\n", dispatch->object);
1025 indent_stream (stderr, indent);
1026 fprintf (stderr, "signal: %lu\n", dispatch->signal);
1027 indent_stream (stderr, indent);
1028 fprintf (stderr, "private_segment_size: %u\n",
1029 dispatch->private_segment_size);
1030 indent_stream (stderr, indent);
1031 fprintf (stderr, "group_segment_size: %u\n",
1032 dispatch->group_segment_size);
1033 indent_stream (stderr, indent);
1034 fprintf (stderr, "children dispatches: %lu\n",
1035 dispatch->kernel_dispatch_count);
1036 indent_stream (stderr, indent);
1037 fprintf (stderr, "omp_num_threads: %u\n",
1038 dispatch->omp_num_threads);
1039 fprintf (stderr, "\n");
1041 for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
1042 print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
1045 /* Create kernel dispatch data structure for a KERNEL and all its
1046 dependencies. */
1048 static struct GOMP_hsa_kernel_dispatch *
1049 create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
1051 struct GOMP_hsa_kernel_dispatch *shadow
1052 = create_single_kernel_dispatch (kernel, omp_data_size);
1053 shadow->omp_num_threads = 64;
1054 shadow->debug = 0;
1055 shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
1057 /* Create kernel dispatch data structures. We do not allow to have
1058 a kernel dispatch with depth bigger than one. */
1059 for (unsigned i = 0; i < kernel->dependencies_count; i++)
1061 struct kernel_info *dependency
1062 = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
1063 shadow->children_dispatches[i]
1064 = create_single_kernel_dispatch (dependency, omp_data_size);
1065 shadow->children_dispatches[i]->queue
1066 = kernel->agent->kernel_dispatch_command_q;
1067 shadow->children_dispatches[i]->omp_level = 1;
1070 return shadow;
1073 /* Do all the work that is necessary before running KERNEL for the first time.
1074 The function assumes the program has been created, finalized and frozen by
1075 create_and_finalize_hsa_program. */
1077 static void
1078 init_kernel (struct kernel_info *kernel)
1080 if (pthread_mutex_lock (&kernel->init_mutex))
1081 GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
1082 if (kernel->initialized)
1084 if (pthread_mutex_unlock (&kernel->init_mutex))
1085 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1086 "mutex");
1088 return;
1091 /* Precomputed maximum size of OMP data necessary for a kernel from kernel
1092 dispatch operation. */
1093 init_single_kernel (kernel, &kernel->max_omp_data_size);
1095 if (!kernel->initialization_failed)
1096 HSA_DEBUG ("\n");
1098 kernel->initialized = true;
1099 if (pthread_mutex_unlock (&kernel->init_mutex))
1100 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1101 "mutex");
1104 /* Parse the target attributes INPUT provided by the compiler and return true
1105 if we should run anything all. If INPUT is NULL, fill DEF with default
1106 values, then store INPUT or DEF into *RESULT. */
1108 static bool
1109 parse_target_attributes (void **input,
1110 struct GOMP_kernel_launch_attributes *def,
1111 struct GOMP_kernel_launch_attributes **result)
1113 if (!input)
1114 GOMP_PLUGIN_fatal ("No target arguments provided");
1116 bool attrs_found = false;
1117 while (*input)
1119 uintptr_t id = (uintptr_t) *input;
1120 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
1121 && ((id & GOMP_TARGET_ARG_ID_MASK)
1122 == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
1124 input++;
1125 attrs_found = true;
1126 break;
1129 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
1130 input++;
1131 input++;
1134 if (!attrs_found)
1136 def->ndim = 1;
1137 def->gdims[0] = 1;
1138 def->gdims[1] = 1;
1139 def->gdims[2] = 1;
1140 def->wdims[0] = 1;
1141 def->wdims[1] = 1;
1142 def->wdims[2] = 1;
1143 *result = def;
1144 HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
1145 return true;
1148 struct GOMP_kernel_launch_attributes *kla;
1149 kla = (struct GOMP_kernel_launch_attributes *) *input;
1150 *result = kla;
1151 if (kla->ndim != 1)
1152 GOMP_PLUGIN_fatal ("HSA does not yet support number of dimensions "
1153 "different from one.");
1154 if (kla->gdims[0] == 0)
1155 return false;
1157 HSA_DEBUG ("GOMP_OFFLOAD_run called with grid size %u and group size %u\n",
1158 kla->gdims[0], kla->wdims[0]);
1160 return true;
1163 /* Return true if the HSA runtime can run function FN_PTR. */
1165 bool
1166 GOMP_OFFLOAD_can_run (void *fn_ptr)
1168 struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1169 struct agent_info *agent = kernel->agent;
1170 create_and_finalize_hsa_program (agent);
1172 if (agent->prog_finalized_error)
1173 goto failure;
1175 init_kernel (kernel);
1176 if (kernel->initialization_failed)
1177 goto failure;
1179 return true;
1181 failure:
1182 if (suppress_host_fallback)
1183 GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
1184 HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
1185 return false;
1188 /* Part of the libgomp plugin interface. Run a kernel on device N and pass it
1189 an array of pointers in VARS as a parameter. The kernel is identified by
1190 FN_PTR which must point to a kernel_info structure. */
1192 void
1193 GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
1195 struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1196 struct agent_info *agent = kernel->agent;
1197 struct GOMP_kernel_launch_attributes def;
1198 struct GOMP_kernel_launch_attributes *kla;
1199 if (!parse_target_attributes (args, &def, &kla))
1201 HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
1202 return;
1204 if (pthread_rwlock_rdlock (&agent->modules_rwlock))
1205 GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
1207 if (!agent->initialized)
1208 GOMP_PLUGIN_fatal ("Agent must be initialized");
1210 if (!kernel->initialized)
1211 GOMP_PLUGIN_fatal ("Called kernel must be initialized");
1213 struct GOMP_hsa_kernel_dispatch *shadow
1214 = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
1216 if (debug)
1218 fprintf (stderr, "\nKernel has following dependencies:\n");
1219 print_kernel_dispatch (shadow, 2);
1222 uint64_t index = hsa_queue_add_write_index_release (agent->command_q, 1);
1223 HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
1225 /* Wait until the queue is not full before writing the packet. */
1226 while (index - hsa_queue_load_read_index_acquire (agent->command_q)
1227 >= agent->command_q->size)
1230 hsa_kernel_dispatch_packet_t *packet;
1231 packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
1232 + index % agent->command_q->size;
1234 memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
1235 packet->setup |= (uint16_t) 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
1236 packet->grid_size_x = kla->gdims[0];
1237 uint32_t wgs = kla->wdims[0];
1238 if (wgs == 0)
1239 /* TODO: Provide a default via environment. */
1240 wgs = 64;
1241 else if (wgs > kla->gdims[0])
1242 wgs = kla->gdims[0];
1243 packet->workgroup_size_x = wgs;
1244 packet->grid_size_y = 1;
1245 packet->workgroup_size_y = 1;
1246 packet->grid_size_z = 1;
1247 packet->workgroup_size_z = 1;
1248 packet->private_segment_size = kernel->private_segment_size;
1249 packet->group_segment_size = kernel->group_segment_size;
1250 packet->kernel_object = kernel->object;
1251 packet->kernarg_address = shadow->kernarg_address;
1252 hsa_signal_t s;
1253 s.handle = shadow->signal;
1254 packet->completion_signal = s;
1255 hsa_signal_store_relaxed (s, 1);
1256 memcpy (shadow->kernarg_address, &vars, sizeof (vars));
1258 memcpy (shadow->kernarg_address + sizeof (vars), &shadow,
1259 sizeof (struct hsa_kernel_runtime *));
1261 HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
1263 uint16_t header;
1264 header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
1265 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
1266 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
1268 HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
1270 __atomic_store_n ((uint16_t *) (&packet->header), header, __ATOMIC_RELEASE);
1271 hsa_signal_store_release (agent->command_q->doorbell_signal, index);
1273 /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
1274 signal wait and signal load operations on their own and we need to
1275 periodically call the hsa_signal_load_acquire on completion signals of
1276 children kernels in the CPU to make that happen. As soon the
1277 limitation will be resolved, this workaround can be removed. */
1279 HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
1281 /* Root signal waits with 1ms timeout. */
1282 while (hsa_signal_wait_acquire (s, HSA_SIGNAL_CONDITION_LT, 1, 1000 * 1000,
1283 HSA_WAIT_STATE_BLOCKED) != 0)
1284 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1286 hsa_signal_t child_s;
1287 child_s.handle = shadow->children_dispatches[i]->signal;
1289 HSA_DEBUG ("Waiting for children completion signal: %lu\n",
1290 shadow->children_dispatches[i]->signal);
1291 hsa_signal_load_acquire (child_s);
1294 release_kernel_dispatch (shadow);
1296 if (pthread_rwlock_unlock (&agent->modules_rwlock))
1297 GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1300 /* Information to be passed to a thread running a kernel asycnronously. */
1302 struct async_run_info
1304 int device;
1305 void *tgt_fn;
1306 void *tgt_vars;
1307 void **args;
1308 void *async_data;
1311 /* Thread routine to run a kernel asynchronously. */
1313 static void *
1314 run_kernel_asynchronously (void *thread_arg)
1316 struct async_run_info *info = (struct async_run_info *) thread_arg;
1317 int device = info->device;
1318 void *tgt_fn = info->tgt_fn;
1319 void *tgt_vars = info->tgt_vars;
1320 void **args = info->args;
1321 void *async_data = info->async_data;
1323 free (info);
1324 GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
1325 GOMP_PLUGIN_target_task_completion (async_data);
1326 return NULL;
1329 /* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
1330 does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
1331 has finished. */
1333 void
1334 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
1335 void **args, void *async_data)
1337 pthread_t pt;
1338 struct async_run_info *info;
1339 HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n")
1340 info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
1342 info->device = device;
1343 info->tgt_fn = tgt_fn;
1344 info->tgt_vars = tgt_vars;
1345 info->args = args;
1346 info->async_data = async_data;
1348 int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
1349 if (err != 0)
1350 GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
1351 strerror (err));
1352 err = pthread_detach (pt);
1353 if (err != 0)
1354 GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
1355 "asynchronously: %s", strerror (err));
1358 /* Deinitialize all information associated with MODULE and kernels within
1359 it. */
1361 void
1362 destroy_module (struct module_info *module)
1364 int i;
1365 for (i = 0; i < module->kernel_count; i++)
1366 if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
1367 GOMP_PLUGIN_fatal ("Failed to destroy an HSA kernel initialization "
1368 "mutex");
1371 /* Part of the libgomp plugin interface. Unload BRIG module described by
1372 struct brig_image_desc in TARGET_DATA from agent number N. */
1374 void
1375 GOMP_OFFLOAD_unload_image (int n, unsigned version, void *target_data)
1377 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
1378 GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin"
1379 " (expected %u, received %u)",
1380 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
1382 struct agent_info *agent;
1383 agent = get_agent_info (n);
1384 if (pthread_rwlock_wrlock (&agent->modules_rwlock))
1385 GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
1387 struct module_info *module = agent->first_module;
1388 while (module)
1390 if (module->image_desc == target_data)
1391 break;
1392 module = module->next;
1394 if (!module)
1395 GOMP_PLUGIN_fatal ("Attempt to unload an image that has never been "
1396 "loaded before");
1398 remove_module_from_agent (agent, module);
1399 destroy_module (module);
1400 free (module);
1401 destroy_hsa_program (agent);
1402 if (pthread_rwlock_unlock (&agent->modules_rwlock))
1403 GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1406 /* Part of the libgomp plugin interface. Deinitialize all information and
1407 status associated with agent number N. We do not attempt any
1408 synchronization, assuming the user and libgomp will not attempt
1409 deinitialization of a device that is in any way being used at the same
1410 time. */
1412 void
1413 GOMP_OFFLOAD_fini_device (int n)
1415 struct agent_info *agent = get_agent_info (n);
1416 if (!agent->initialized)
1417 return;
1419 struct module_info *next_module = agent->first_module;
1420 while (next_module)
1422 struct module_info *module = next_module;
1423 next_module = module->next;
1424 destroy_module (module);
1425 free (module);
1427 agent->first_module = NULL;
1428 destroy_hsa_program (agent);
1430 release_agent_shared_libraries (agent);
1432 hsa_status_t status = hsa_queue_destroy (agent->command_q);
1433 if (status != HSA_STATUS_SUCCESS)
1434 hsa_fatal ("Error destroying command queue", status);
1435 status = hsa_queue_destroy (agent->kernel_dispatch_command_q);
1436 if (status != HSA_STATUS_SUCCESS)
1437 hsa_fatal ("Error destroying kernel dispatch command queue", status);
1438 if (pthread_mutex_destroy (&agent->prog_mutex))
1439 GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent program mutex");
1440 if (pthread_rwlock_destroy (&agent->modules_rwlock))
1441 GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent rwlock");
1442 agent->initialized = false;
1445 /* Part of the libgomp plugin interface. Not implemented as it is not required
1446 for HSA. */
1448 void *
1449 GOMP_OFFLOAD_alloc (int ord, size_t size)
1451 GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_alloc is not implemented because "
1452 "it should never be called");
1455 /* Part of the libgomp plugin interface. Not implemented as it is not required
1456 for HSA. */
1458 void
1459 GOMP_OFFLOAD_free (int ord, void *ptr)
1461 GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_free is not implemented because "
1462 "it should never be called");
1465 /* Part of the libgomp plugin interface. Not implemented as it is not required
1466 for HSA. */
1468 void *
1469 GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
1471 GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
1472 "it should never be called");
1475 /* Part of the libgomp plugin interface. Not implemented as it is not required
1476 for HSA. */
1478 void *
1479 GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
1481 GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
1482 "it should never be called");
1485 /* Part of the libgomp plugin interface. Not implemented as it is not required
1486 for HSA. */
1488 void *
1489 GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
1491 GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
1492 "it should never be called");