Fix finding of a first match predictor
[official-gcc.git] / libgomp / plugin / plugin-hsa.c
blobbed8555fb90eb3adf4b2607849ac8e2fc9e890ab
1 /* Plugin for HSAIL execution.
3 Copyright (C) 2013-2016 Free Software Foundation, Inc.
5 Contributed by Martin Jambor <mjambor@suse.cz> and
6 Martin Liska <mliska@suse.cz>.
8 This file is part of the GNU Offloading and Multi Processing Library
9 (libgomp).
11 Libgomp is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3, or (at your option)
14 any later version.
16 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
17 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 more details.
21 Under Section 7 of GPL version 3, you are granted additional
22 permissions described in the GCC Runtime Library Exception, version
23 3.1, as published by the Free Software Foundation.
25 You should have received a copy of the GNU General Public License and
26 a copy of the GCC Runtime Library Exception along with this program;
27 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
28 <http://www.gnu.org/licenses/>. */
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <pthread.h>
34 #include <hsa.h>
35 #include <hsa_ext_finalize.h>
36 #include <dlfcn.h>
37 #include "libgomp-plugin.h"
38 #include "gomp-constants.h"
40 /* Keep the following GOMP prefixed structures in sync with respective parts of
41 the compiler. */
43 /* Structure describing the run-time and grid properties of an HSA kernel
44 lauch. */
46 struct GOMP_kernel_launch_attributes
48 /* Number of dimensions the workload has. Maximum number is 3. */
49 uint32_t ndim;
50 /* Size of the grid in the three respective dimensions. */
51 uint32_t gdims[3];
52 /* Size of work-groups in the respective dimensions. */
53 uint32_t wdims[3];
56 /* Collection of information needed for a dispatch of a kernel from a
57 kernel. */
59 struct GOMP_hsa_kernel_dispatch
61 /* Pointer to a command queue associated with a kernel dispatch agent. */
62 void *queue;
63 /* Pointer to reserved memory for OMP data struct copying. */
64 void *omp_data_memory;
65 /* Pointer to a memory space used for kernel arguments passing. */
66 void *kernarg_address;
67 /* Kernel object. */
68 uint64_t object;
69 /* Synchronization signal used for dispatch synchronization. */
70 uint64_t signal;
71 /* Private segment size. */
72 uint32_t private_segment_size;
73 /* Group segment size. */
74 uint32_t group_segment_size;
75 /* Number of children kernel dispatches. */
76 uint64_t kernel_dispatch_count;
77 /* Debug purpose argument. */
78 uint64_t debug;
79 /* Levels-var ICV. */
80 uint64_t omp_level;
81 /* Kernel dispatch structures created for children kernel dispatches. */
82 struct GOMP_hsa_kernel_dispatch **children_dispatches;
83 /* Number of threads. */
84 uint32_t omp_num_threads;
87 /* Part of the libgomp plugin interface. Return the name of the accelerator,
88 which is "hsa". */
90 const char *
91 GOMP_OFFLOAD_get_name (void)
93 return "hsa";
96 /* Part of the libgomp plugin interface. Return the specific capabilities the
97 HSA accelerator have. */
99 unsigned int
100 GOMP_OFFLOAD_get_caps (void)
102 return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
105 /* Part of the libgomp plugin interface. Identify as HSA accelerator. */
108 GOMP_OFFLOAD_get_type (void)
110 return OFFLOAD_TARGET_TYPE_HSA;
113 /* Return the libgomp version number we're compatible with. There is
114 no requirement for cross-version compatibility. */
116 unsigned
117 GOMP_OFFLOAD_version (void)
119 return GOMP_VERSION;
122 /* Flag to decide whether print to stderr information about what is going on.
123 Set in init_debug depending on environment variables. */
125 static bool debug;
127 /* Flag to decide if the runtime should suppress a possible fallback to host
128 execution. */
130 static bool suppress_host_fallback;
132 /* Initialize debug and suppress_host_fallback according to the environment. */
134 static void
135 init_enviroment_variables (void)
137 if (getenv ("HSA_DEBUG"))
138 debug = true;
139 else
140 debug = false;
142 if (getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
143 suppress_host_fallback = true;
144 else
145 suppress_host_fallback = false;
148 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
149 is set to true. */
151 #define HSA_LOG(prefix, ...) \
152 do \
154 if (debug) \
156 fprintf (stderr, prefix); \
157 fprintf (stderr, __VA_ARGS__); \
160 while (false);
162 /* Print a debugging message to stderr. */
164 #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
166 /* Print a warning message to stderr. */
168 #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
170 /* Print HSA warning STR with an HSA STATUS code. */
172 static void
173 hsa_warn (const char *str, hsa_status_t status)
175 if (!debug)
176 return;
178 const char *hsa_error_msg;
179 hsa_status_string (status, &hsa_error_msg);
181 fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
184 /* Report a fatal error STR together with the HSA error corresponding to STATUS
185 and terminate execution of the current process. */
187 static void
188 hsa_fatal (const char *str, hsa_status_t status)
190 const char *hsa_error_msg;
191 hsa_status_string (status, &hsa_error_msg);
192 GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
193 hsa_error_msg);
196 /* Like hsa_fatal, except only report error message, and return FALSE
197 for propagating error processing to outside of plugin. */
199 static bool
200 hsa_error (const char *str, hsa_status_t status)
202 const char *hsa_error_msg;
203 hsa_status_string (status, &hsa_error_msg);
204 GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
205 hsa_error_msg);
206 return false;
209 struct hsa_kernel_description
211 const char *name;
212 unsigned omp_data_size;
213 bool gridified_kernel_p;
214 unsigned kernel_dependencies_count;
215 const char **kernel_dependencies;
218 struct global_var_info
220 const char *name;
221 void *address;
224 /* Data passed by the static initializer of a compilation unit containing BRIG
225 to GOMP_offload_register. */
227 struct brig_image_desc
229 hsa_ext_module_t brig_module;
230 const unsigned kernel_count;
231 struct hsa_kernel_description *kernel_infos;
232 const unsigned global_variable_count;
233 struct global_var_info *global_variables;
236 struct agent_info;
238 /* Information required to identify, finalize and run any given kernel. */
240 struct kernel_info
242 /* Name of the kernel, required to locate it within the brig module. */
243 const char *name;
244 /* Size of memory space for OMP data. */
245 unsigned omp_data_size;
246 /* The specific agent the kernel has been or will be finalized for and run
247 on. */
248 struct agent_info *agent;
249 /* The specific module where the kernel takes place. */
250 struct module_info *module;
251 /* Mutex enforcing that at most once thread ever initializes a kernel for
252 use. A thread should have locked agent->modules_rwlock for reading before
253 acquiring it. */
254 pthread_mutex_t init_mutex;
255 /* Flag indicating whether the kernel has been initialized and all fields
256 below it contain valid data. */
257 bool initialized;
258 /* Flag indicating that the kernel has a problem that blocks an execution. */
259 bool initialization_failed;
260 /* The object to be put into the dispatch queue. */
261 uint64_t object;
262 /* Required size of kernel arguments. */
263 uint32_t kernarg_segment_size;
264 /* Required size of group segment. */
265 uint32_t group_segment_size;
266 /* Required size of private segment. */
267 uint32_t private_segment_size;
268 /* List of all kernel dependencies. */
269 const char **dependencies;
270 /* Number of dependencies. */
271 unsigned dependencies_count;
272 /* Maximum OMP data size necessary for kernel from kernel dispatches. */
273 unsigned max_omp_data_size;
274 /* True if the kernel is gridified. */
275 bool gridified_kernel_p;
278 /* Information about a particular brig module, its image and kernels. */
280 struct module_info
282 /* The next and previous module in the linked list of modules of an agent. */
283 struct module_info *next, *prev;
284 /* The description with which the program has registered the image. */
285 struct brig_image_desc *image_desc;
287 /* Number of kernels in this module. */
288 int kernel_count;
289 /* An array of kernel_info structures describing each kernel in this
290 module. */
291 struct kernel_info kernels[];
294 /* Information about shared brig library. */
296 struct brig_library_info
298 char *file_name;
299 hsa_ext_module_t image;
302 /* Description of an HSA GPU agent and the program associated with it. */
304 struct agent_info
306 /* The HSA ID of the agent. Assigned when hsa_context is initialized. */
307 hsa_agent_t id;
308 /* Whether the agent has been initialized. The fields below are usable only
309 if it has been. */
310 bool initialized;
311 /* The HSA ISA of this agent. */
312 hsa_isa_t isa;
313 /* Command queue of the agent. */
314 hsa_queue_t *command_q;
315 /* Kernel from kernel dispatch command queue. */
316 hsa_queue_t *kernel_dispatch_command_q;
317 /* The HSA memory region from which to allocate kernel arguments. */
318 hsa_region_t kernarg_region;
320 /* Read-write lock that protects kernels which are running or about to be run
321 from interference with loading and unloading of images. Needs to be
322 locked for reading while a kernel is being run, and for writing if the
323 list of modules is manipulated (and thus the HSA program invalidated). */
324 pthread_rwlock_t modules_rwlock;
325 /* The first module in a linked list of modules associated with this
326 kernel. */
327 struct module_info *first_module;
329 /* Mutex enforcing that only one thread will finalize the HSA program. A
330 thread should have locked agent->modules_rwlock for reading before
331 acquiring it. */
332 pthread_mutex_t prog_mutex;
333 /* Flag whether the HSA program that consists of all the modules has been
334 finalized. */
335 bool prog_finalized;
336 /* Flag whether the program was finalized but with a failure. */
337 bool prog_finalized_error;
338 /* HSA executable - the finalized program that is used to locate kernels. */
339 hsa_executable_t executable;
340 /* List of BRIG libraries. */
341 struct brig_library_info **brig_libraries;
342 /* Number of loaded shared BRIG libraries. */
343 unsigned brig_libraries_count;
346 /* Information about the whole HSA environment and all of its agents. */
348 struct hsa_context_info
350 /* Whether the structure has been initialized. */
351 bool initialized;
352 /* Number of usable GPU HSA agents in the system. */
353 int agent_count;
354 /* Array of agent_info structures describing the individual HSA agents. */
355 struct agent_info *agents;
358 /* Information about the whole HSA environment and all of its agents. */
360 static struct hsa_context_info hsa_context;
362 /* Find kernel for an AGENT by name provided in KERNEL_NAME. */
364 static struct kernel_info *
365 get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
367 struct module_info *module = agent->first_module;
369 while (module)
371 for (unsigned i = 0; i < module->kernel_count; i++)
372 if (strcmp (module->kernels[i].name, kernel_name) == 0)
373 return &module->kernels[i];
375 module = module->next;
378 return NULL;
381 /* Return true if the agent is a GPU and acceptable of concurrent submissions
382 from different threads. */
384 static bool
385 suitable_hsa_agent_p (hsa_agent_t agent)
387 hsa_device_type_t device_type;
388 hsa_status_t status
389 = hsa_agent_get_info (agent, HSA_AGENT_INFO_DEVICE, &device_type);
390 if (status != HSA_STATUS_SUCCESS || device_type != HSA_DEVICE_TYPE_GPU)
391 return false;
393 uint32_t features = 0;
394 status = hsa_agent_get_info (agent, HSA_AGENT_INFO_FEATURE, &features);
395 if (status != HSA_STATUS_SUCCESS
396 || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
397 return false;
398 hsa_queue_type_t queue_type;
399 status = hsa_agent_get_info (agent, HSA_AGENT_INFO_QUEUE_TYPE, &queue_type);
400 if (status != HSA_STATUS_SUCCESS
401 || (queue_type != HSA_QUEUE_TYPE_MULTI))
402 return false;
404 return true;
407 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
408 agent_count in hsa_context. */
410 static hsa_status_t
411 count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
413 if (suitable_hsa_agent_p (agent))
414 hsa_context.agent_count++;
415 return HSA_STATUS_SUCCESS;
418 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
419 id to the describing structure in the hsa context. The index of the
420 structure is pointed to by DATA, increment it afterwards. */
422 static hsa_status_t
423 assign_agent_ids (hsa_agent_t agent, void *data)
425 if (suitable_hsa_agent_p (agent))
427 int *agent_index = (int *) data;
428 hsa_context.agents[*agent_index].id = agent;
429 ++*agent_index;
431 return HSA_STATUS_SUCCESS;
434 /* Initialize hsa_context if it has not already been done.
435 Return TRUE on success. */
437 static bool
438 init_hsa_context (void)
440 hsa_status_t status;
441 int agent_index = 0;
443 if (hsa_context.initialized)
444 return true;
445 init_enviroment_variables ();
446 status = hsa_init ();
447 if (status != HSA_STATUS_SUCCESS)
448 return hsa_error ("Run-time could not be initialized", status);
449 HSA_DEBUG ("HSA run-time initialized\n");
450 status = hsa_iterate_agents (count_gpu_agents, NULL);
451 if (status != HSA_STATUS_SUCCESS)
452 return hsa_error ("HSA GPU devices could not be enumerated", status);
453 HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
455 hsa_context.agents
456 = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
457 * sizeof (struct agent_info));
458 status = hsa_iterate_agents (assign_agent_ids, &agent_index);
459 if (agent_index != hsa_context.agent_count)
461 GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
462 return false;
464 hsa_context.initialized = true;
465 return true;
468 /* Callback of dispatch queues to report errors. */
470 static void
471 queue_callback (hsa_status_t status,
472 hsa_queue_t *queue __attribute__ ((unused)),
473 void *data __attribute__ ((unused)))
475 hsa_fatal ("Asynchronous queue error", status);
478 /* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
479 used for kernarg allocations and if so write it to the memory pointed to by
480 DATA and break the query. */
482 static hsa_status_t
483 get_kernarg_memory_region (hsa_region_t region, void *data)
485 hsa_status_t status;
486 hsa_region_segment_t segment;
488 status = hsa_region_get_info (region, HSA_REGION_INFO_SEGMENT, &segment);
489 if (status != HSA_STATUS_SUCCESS)
490 return status;
491 if (segment != HSA_REGION_SEGMENT_GLOBAL)
492 return HSA_STATUS_SUCCESS;
494 uint32_t flags;
495 status = hsa_region_get_info (region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
496 if (status != HSA_STATUS_SUCCESS)
497 return status;
498 if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
500 hsa_region_t *ret = (hsa_region_t *) data;
501 *ret = region;
502 return HSA_STATUS_INFO_BREAK;
504 return HSA_STATUS_SUCCESS;
507 /* Part of the libgomp plugin interface. Return the number of HSA devices on
508 the system. */
511 GOMP_OFFLOAD_get_num_devices (void)
513 if (!init_hsa_context ())
514 return 0;
515 return hsa_context.agent_count;
518 /* Part of the libgomp plugin interface. Initialize agent number N so that it
519 can be used for computation. Return TRUE on success. */
521 bool
522 GOMP_OFFLOAD_init_device (int n)
524 if (!init_hsa_context ())
525 return false;
526 if (n >= hsa_context.agent_count)
528 GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
529 return false;
531 struct agent_info *agent = &hsa_context.agents[n];
533 if (agent->initialized)
534 return true;
536 if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
538 GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
539 return false;
541 if (pthread_mutex_init (&agent->prog_mutex, NULL))
543 GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
544 return false;
547 uint32_t queue_size;
548 hsa_status_t status;
549 status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
550 &queue_size);
551 if (status != HSA_STATUS_SUCCESS)
552 return hsa_error ("Error requesting maximum queue size of the HSA agent",
553 status);
554 status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_ISA, &agent->isa);
555 if (status != HSA_STATUS_SUCCESS)
556 return hsa_error ("Error querying the ISA of the agent", status);
557 status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
558 queue_callback, NULL, UINT32_MAX, UINT32_MAX,
559 &agent->command_q);
560 if (status != HSA_STATUS_SUCCESS)
561 return hsa_error ("Error creating command queue", status);
563 status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
564 queue_callback, NULL, UINT32_MAX, UINT32_MAX,
565 &agent->kernel_dispatch_command_q);
566 if (status != HSA_STATUS_SUCCESS)
567 return hsa_error ("Error creating kernel dispatch command queue", status);
569 agent->kernarg_region.handle = (uint64_t) -1;
570 status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region,
571 &agent->kernarg_region);
572 if (agent->kernarg_region.handle == (uint64_t) -1)
574 GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
575 "arguments");
576 return false;
578 HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
579 (long long unsigned) agent->command_q->id);
580 HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
581 (long long unsigned) agent->kernel_dispatch_command_q->id);
582 agent->initialized = true;
583 return true;
586 /* Verify that hsa_context has already been initialized and return the
587 agent_info structure describing device number N. Return NULL on error. */
589 static struct agent_info *
590 get_agent_info (int n)
592 if (!hsa_context.initialized)
594 GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
595 return NULL;
597 if (n >= hsa_context.agent_count)
599 GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
600 return NULL;
602 if (!hsa_context.agents[n].initialized)
604 GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
605 return NULL;
607 return &hsa_context.agents[n];
610 /* Insert MODULE to the linked list of modules of AGENT. */
612 static void
613 add_module_to_agent (struct agent_info *agent, struct module_info *module)
615 if (agent->first_module)
616 agent->first_module->prev = module;
617 module->next = agent->first_module;
618 module->prev = NULL;
619 agent->first_module = module;
622 /* Remove MODULE from the linked list of modules of AGENT. */
624 static void
625 remove_module_from_agent (struct agent_info *agent, struct module_info *module)
627 if (agent->first_module == module)
628 agent->first_module = module->next;
629 if (module->prev)
630 module->prev->next = module->next;
631 if (module->next)
632 module->next->prev = module->prev;
635 /* Free the HSA program in agent and everything associated with it and set
636 agent->prog_finalized and the initialized flags of all kernels to false.
637 Return TRUE on success. */
639 static bool
640 destroy_hsa_program (struct agent_info *agent)
642 if (!agent->prog_finalized || agent->prog_finalized_error)
643 return true;
645 hsa_status_t status;
647 HSA_DEBUG ("Destroying the current HSA program.\n");
649 status = hsa_executable_destroy (agent->executable);
650 if (status != HSA_STATUS_SUCCESS)
651 return hsa_error ("Could not destroy HSA executable", status);
653 struct module_info *module;
654 for (module = agent->first_module; module; module = module->next)
656 int i;
657 for (i = 0; i < module->kernel_count; i++)
658 module->kernels[i].initialized = false;
660 agent->prog_finalized = false;
661 return true;
664 /* Part of the libgomp plugin interface. Load BRIG module described by struct
665 brig_image_desc in TARGET_DATA and return references to kernel descriptors
666 in TARGET_TABLE. */
669 GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data,
670 struct addr_pair **target_table)
672 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
674 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
675 " (expected %u, received %u)",
676 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
677 return -1;
680 struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
681 struct agent_info *agent;
682 struct addr_pair *pair;
683 struct module_info *module;
684 struct kernel_info *kernel;
685 int kernel_count = image_desc->kernel_count;
687 agent = get_agent_info (ord);
688 if (!agent)
689 return -1;
691 if (pthread_rwlock_wrlock (&agent->modules_rwlock))
693 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
694 return -1;
696 if (agent->prog_finalized
697 && !destroy_hsa_program (agent))
698 return -1;
700 HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
701 pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
702 *target_table = pair;
703 module = (struct module_info *)
704 GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
705 + kernel_count * sizeof (struct kernel_info));
706 module->image_desc = image_desc;
707 module->kernel_count = kernel_count;
709 kernel = &module->kernels[0];
711 /* Allocate memory for kernel dependencies. */
712 for (unsigned i = 0; i < kernel_count; i++)
714 pair->start = (uintptr_t) kernel;
715 pair->end = (uintptr_t) (kernel + 1);
717 struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
718 kernel->agent = agent;
719 kernel->module = module;
720 kernel->name = d->name;
721 kernel->omp_data_size = d->omp_data_size;
722 kernel->gridified_kernel_p = d->gridified_kernel_p;
723 kernel->dependencies_count = d->kernel_dependencies_count;
724 kernel->dependencies = d->kernel_dependencies;
725 if (pthread_mutex_init (&kernel->init_mutex, NULL))
727 GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
728 return -1;
731 kernel++;
732 pair++;
735 add_module_to_agent (agent, module);
736 if (pthread_rwlock_unlock (&agent->modules_rwlock))
738 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
739 return -1;
741 return kernel_count;
744 /* Add a shared BRIG library from a FILE_NAME to an AGENT. */
746 static struct brig_library_info *
747 add_shared_library (const char *file_name, struct agent_info *agent)
749 struct brig_library_info *library = NULL;
751 void *f = dlopen (file_name, RTLD_NOW);
752 void *start = dlsym (f, "__brig_start");
753 void *end = dlsym (f, "__brig_end");
755 if (start == NULL || end == NULL)
756 return NULL;
758 unsigned size = end - start;
759 char *buf = (char *) GOMP_PLUGIN_malloc (size);
760 memcpy (buf, start, size);
762 library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
763 library->file_name = (char *) GOMP_PLUGIN_malloc
764 ((strlen (file_name) + 1));
765 strcpy (library->file_name, file_name);
766 library->image = (hsa_ext_module_t) buf;
768 return library;
771 /* Release memory used for BRIG shared libraries that correspond
772 to an AGENT. */
774 static void
775 release_agent_shared_libraries (struct agent_info *agent)
777 for (unsigned i = 0; i < agent->brig_libraries_count; i++)
778 if (agent->brig_libraries[i])
780 free (agent->brig_libraries[i]->file_name);
781 free (agent->brig_libraries[i]->image);
782 free (agent->brig_libraries[i]);
785 free (agent->brig_libraries);
788 /* Create and finalize the program consisting of all loaded modules. */
790 static void
791 create_and_finalize_hsa_program (struct agent_info *agent)
793 hsa_status_t status;
794 hsa_ext_program_t prog_handle;
795 int mi = 0;
797 if (pthread_mutex_lock (&agent->prog_mutex))
798 GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
799 if (agent->prog_finalized)
800 goto final;
802 status = hsa_ext_program_create (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
803 HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
804 NULL, &prog_handle);
805 if (status != HSA_STATUS_SUCCESS)
806 hsa_fatal ("Could not create an HSA program", status);
808 HSA_DEBUG ("Created a finalized program\n");
810 struct module_info *module = agent->first_module;
811 while (module)
813 status = hsa_ext_program_add_module (prog_handle,
814 module->image_desc->brig_module);
815 if (status != HSA_STATUS_SUCCESS)
816 hsa_fatal ("Could not add a module to the HSA program", status);
817 module = module->next;
818 mi++;
821 /* Load all shared libraries. */
822 const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
823 const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
825 agent->brig_libraries_count = libraries_count;
826 agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
827 (sizeof (struct brig_library_info) * libraries_count);
829 for (unsigned i = 0; i < libraries_count; i++)
831 struct brig_library_info *library = add_shared_library (libraries[i],
832 agent);
833 if (library == NULL)
835 HSA_WARNING ("Could not open a shared BRIG library: %s\n",
836 libraries[i]);
837 continue;
840 status = hsa_ext_program_add_module (prog_handle, library->image);
841 if (status != HSA_STATUS_SUCCESS)
842 hsa_warn ("Could not add a shared BRIG library the HSA program",
843 status);
844 else
845 HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
846 libraries[i]);
849 hsa_ext_control_directives_t control_directives;
850 memset (&control_directives, 0, sizeof (control_directives));
851 hsa_code_object_t code_object;
852 status = hsa_ext_program_finalize (prog_handle, agent->isa,
853 HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
854 control_directives, "",
855 HSA_CODE_OBJECT_TYPE_PROGRAM,
856 &code_object);
857 if (status != HSA_STATUS_SUCCESS)
859 hsa_warn ("Finalization of the HSA program failed", status);
860 goto failure;
863 HSA_DEBUG ("Finalization done\n");
864 hsa_ext_program_destroy (prog_handle);
866 status
867 = hsa_executable_create (HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN,
868 "", &agent->executable);
869 if (status != HSA_STATUS_SUCCESS)
870 hsa_fatal ("Could not create HSA executable", status);
872 module = agent->first_module;
873 while (module)
875 /* Initialize all global variables declared in the module. */
876 for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
878 struct global_var_info *var;
879 var = &module->image_desc->global_variables[i];
880 status
881 = hsa_executable_global_variable_define (agent->executable,
882 var->name, var->address);
884 HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
885 var->address);
887 if (status != HSA_STATUS_SUCCESS)
888 hsa_fatal ("Could not define a global variable in the HSA program",
889 status);
892 module = module->next;
895 status = hsa_executable_load_code_object (agent->executable, agent->id,
896 code_object, "");
897 if (status != HSA_STATUS_SUCCESS)
898 hsa_fatal ("Could not add a code object to the HSA executable", status);
899 status = hsa_executable_freeze (agent->executable, "");
900 if (status != HSA_STATUS_SUCCESS)
901 hsa_fatal ("Could not freeze the HSA executable", status);
903 HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
905 /* If all goes good, jump to final. */
906 goto final;
908 failure:
909 agent->prog_finalized_error = true;
911 final:
912 agent->prog_finalized = true;
914 if (pthread_mutex_unlock (&agent->prog_mutex))
915 GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
918 /* Create kernel dispatch data structure for given KERNEL. */
920 static struct GOMP_hsa_kernel_dispatch *
921 create_single_kernel_dispatch (struct kernel_info *kernel,
922 unsigned omp_data_size)
924 struct agent_info *agent = kernel->agent;
925 struct GOMP_hsa_kernel_dispatch *shadow
926 = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
928 shadow->queue = agent->command_q;
929 shadow->omp_data_memory
930 = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
931 unsigned dispatch_count = kernel->dependencies_count;
932 shadow->kernel_dispatch_count = dispatch_count;
934 shadow->children_dispatches
935 = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
937 shadow->object = kernel->object;
939 hsa_signal_t sync_signal;
940 hsa_status_t status = hsa_signal_create (1, 0, NULL, &sync_signal);
941 if (status != HSA_STATUS_SUCCESS)
942 hsa_fatal ("Error creating the HSA sync signal", status);
944 shadow->signal = sync_signal.handle;
945 shadow->private_segment_size = kernel->private_segment_size;
946 shadow->group_segment_size = kernel->group_segment_size;
948 status
949 = hsa_memory_allocate (agent->kernarg_region, kernel->kernarg_segment_size,
950 &shadow->kernarg_address);
951 if (status != HSA_STATUS_SUCCESS)
952 hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
954 return shadow;
957 /* Release data structure created for a kernel dispatch in SHADOW argument. */
959 static void
960 release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
962 HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow,
963 shadow->debug, (void *) shadow->debug);
965 hsa_memory_free (shadow->kernarg_address);
967 hsa_signal_t s;
968 s.handle = shadow->signal;
969 hsa_signal_destroy (s);
971 free (shadow->omp_data_memory);
973 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
974 release_kernel_dispatch (shadow->children_dispatches[i]);
976 free (shadow->children_dispatches);
977 free (shadow);
980 /* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used
981 to calculate maximum necessary memory for OMP data allocation. */
983 static void
984 init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
986 hsa_status_t status;
987 struct agent_info *agent = kernel->agent;
988 hsa_executable_symbol_t kernel_symbol;
989 status = hsa_executable_get_symbol (agent->executable, NULL, kernel->name,
990 agent->id, 0, &kernel_symbol);
991 if (status != HSA_STATUS_SUCCESS)
993 hsa_warn ("Could not find symbol for kernel in the code object", status);
994 goto failure;
996 HSA_DEBUG ("Located kernel %s\n", kernel->name);
997 status
998 = hsa_executable_symbol_get_info (kernel_symbol,
999 HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
1000 &kernel->object);
1001 if (status != HSA_STATUS_SUCCESS)
1002 hsa_fatal ("Could not extract a kernel object from its symbol", status);
1003 status = hsa_executable_symbol_get_info
1004 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
1005 &kernel->kernarg_segment_size);
1006 if (status != HSA_STATUS_SUCCESS)
1007 hsa_fatal ("Could not get info about kernel argument size", status);
1008 status = hsa_executable_symbol_get_info
1009 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
1010 &kernel->group_segment_size);
1011 if (status != HSA_STATUS_SUCCESS)
1012 hsa_fatal ("Could not get info about kernel group segment size", status);
1013 status = hsa_executable_symbol_get_info
1014 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
1015 &kernel->private_segment_size);
1016 if (status != HSA_STATUS_SUCCESS)
1017 hsa_fatal ("Could not get info about kernel private segment size",
1018 status);
1020 HSA_DEBUG ("Kernel structure for %s fully initialized with "
1021 "following segment sizes: \n", kernel->name);
1022 HSA_DEBUG (" group_segment_size: %u\n",
1023 (unsigned) kernel->group_segment_size);
1024 HSA_DEBUG (" private_segment_size: %u\n",
1025 (unsigned) kernel->private_segment_size);
1026 HSA_DEBUG (" kernarg_segment_size: %u\n",
1027 (unsigned) kernel->kernarg_segment_size);
1028 HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size);
1029 HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
1031 if (kernel->omp_data_size > *max_omp_data_size)
1032 *max_omp_data_size = kernel->omp_data_size;
1034 for (unsigned i = 0; i < kernel->dependencies_count; i++)
1036 struct kernel_info *dependency
1037 = get_kernel_for_agent (agent, kernel->dependencies[i]);
1039 if (dependency == NULL)
1041 HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
1042 "dependency name: %s\n", kernel->name,
1043 kernel->dependencies[i]);
1044 goto failure;
1047 if (dependency->dependencies_count > 0)
1049 HSA_DEBUG ("HSA does not allow kernel dispatching code with "
1050 "a depth bigger than one\n")
1051 goto failure;
1054 init_single_kernel (dependency, max_omp_data_size);
1057 return;
1059 failure:
1060 kernel->initialization_failed = true;
1063 /* Indent stream F by INDENT spaces. */
1065 static void
1066 indent_stream (FILE *f, unsigned indent)
1068 fprintf (f, "%*s", indent, "");
1071 /* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */
1073 static void
1074 print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
1076 indent_stream (stderr, indent);
1077 fprintf (stderr, "this: %p\n", dispatch);
1078 indent_stream (stderr, indent);
1079 fprintf (stderr, "queue: %p\n", dispatch->queue);
1080 indent_stream (stderr, indent);
1081 fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
1082 indent_stream (stderr, indent);
1083 fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
1084 indent_stream (stderr, indent);
1085 fprintf (stderr, "object: %lu\n", dispatch->object);
1086 indent_stream (stderr, indent);
1087 fprintf (stderr, "signal: %lu\n", dispatch->signal);
1088 indent_stream (stderr, indent);
1089 fprintf (stderr, "private_segment_size: %u\n",
1090 dispatch->private_segment_size);
1091 indent_stream (stderr, indent);
1092 fprintf (stderr, "group_segment_size: %u\n",
1093 dispatch->group_segment_size);
1094 indent_stream (stderr, indent);
1095 fprintf (stderr, "children dispatches: %lu\n",
1096 dispatch->kernel_dispatch_count);
1097 indent_stream (stderr, indent);
1098 fprintf (stderr, "omp_num_threads: %u\n",
1099 dispatch->omp_num_threads);
1100 fprintf (stderr, "\n");
1102 for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
1103 print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
1106 /* Create kernel dispatch data structure for a KERNEL and all its
1107 dependencies. */
1109 static struct GOMP_hsa_kernel_dispatch *
1110 create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
1112 struct GOMP_hsa_kernel_dispatch *shadow
1113 = create_single_kernel_dispatch (kernel, omp_data_size);
1114 shadow->omp_num_threads = 64;
1115 shadow->debug = 0;
1116 shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
1118 /* Create kernel dispatch data structures. We do not allow to have
1119 a kernel dispatch with depth bigger than one. */
1120 for (unsigned i = 0; i < kernel->dependencies_count; i++)
1122 struct kernel_info *dependency
1123 = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
1124 shadow->children_dispatches[i]
1125 = create_single_kernel_dispatch (dependency, omp_data_size);
1126 shadow->children_dispatches[i]->queue
1127 = kernel->agent->kernel_dispatch_command_q;
1128 shadow->children_dispatches[i]->omp_level = 1;
1131 return shadow;
1134 /* Do all the work that is necessary before running KERNEL for the first time.
1135 The function assumes the program has been created, finalized and frozen by
1136 create_and_finalize_hsa_program. */
1138 static void
1139 init_kernel (struct kernel_info *kernel)
1141 if (pthread_mutex_lock (&kernel->init_mutex))
1142 GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
1143 if (kernel->initialized)
1145 if (pthread_mutex_unlock (&kernel->init_mutex))
1146 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1147 "mutex");
1149 return;
1152 /* Precomputed maximum size of OMP data necessary for a kernel from kernel
1153 dispatch operation. */
1154 init_single_kernel (kernel, &kernel->max_omp_data_size);
1156 if (!kernel->initialization_failed)
1157 HSA_DEBUG ("\n");
1159 kernel->initialized = true;
1160 if (pthread_mutex_unlock (&kernel->init_mutex))
1161 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1162 "mutex");
1165 /* Parse the target attributes INPUT provided by the compiler and return true
1166 if we should run anything all. If INPUT is NULL, fill DEF with default
1167 values, then store INPUT or DEF into *RESULT. */
1169 static bool
1170 parse_target_attributes (void **input,
1171 struct GOMP_kernel_launch_attributes *def,
1172 struct GOMP_kernel_launch_attributes **result)
1174 if (!input)
1175 GOMP_PLUGIN_fatal ("No target arguments provided");
1177 bool attrs_found = false;
1178 while (*input)
1180 uintptr_t id = (uintptr_t) *input;
1181 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
1182 && ((id & GOMP_TARGET_ARG_ID_MASK)
1183 == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
1185 input++;
1186 attrs_found = true;
1187 break;
1190 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
1191 input++;
1192 input++;
1195 if (!attrs_found)
1197 def->ndim = 1;
1198 def->gdims[0] = 1;
1199 def->gdims[1] = 1;
1200 def->gdims[2] = 1;
1201 def->wdims[0] = 1;
1202 def->wdims[1] = 1;
1203 def->wdims[2] = 1;
1204 *result = def;
1205 HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
1206 return true;
1209 struct GOMP_kernel_launch_attributes *kla;
1210 kla = (struct GOMP_kernel_launch_attributes *) *input;
1211 *result = kla;
1212 if (kla->ndim != 1)
1213 GOMP_PLUGIN_fatal ("HSA does not yet support number of dimensions "
1214 "different from one.");
1215 if (kla->gdims[0] == 0)
1216 return false;
1218 HSA_DEBUG ("GOMP_OFFLOAD_run called with grid size %u and group size %u\n",
1219 kla->gdims[0], kla->wdims[0]);
1221 return true;
1224 /* Return true if the HSA runtime can run function FN_PTR. */
1226 bool
1227 GOMP_OFFLOAD_can_run (void *fn_ptr)
1229 struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1230 struct agent_info *agent = kernel->agent;
1231 create_and_finalize_hsa_program (agent);
1233 if (agent->prog_finalized_error)
1234 goto failure;
1236 init_kernel (kernel);
1237 if (kernel->initialization_failed)
1238 goto failure;
1240 return true;
1242 failure:
1243 if (suppress_host_fallback)
1244 GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
1245 HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
1246 return false;
1249 /* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET. */
1251 void
1252 packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
1254 __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE);
1257 /* Part of the libgomp plugin interface. Run a kernel on device N and pass it
1258 an array of pointers in VARS as a parameter. The kernel is identified by
1259 FN_PTR which must point to a kernel_info structure. */
1261 void
1262 GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args)
1264 struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1265 struct agent_info *agent = kernel->agent;
1266 struct GOMP_kernel_launch_attributes def;
1267 struct GOMP_kernel_launch_attributes *kla;
1268 if (!parse_target_attributes (args, &def, &kla))
1270 HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
1271 return;
1273 if (pthread_rwlock_rdlock (&agent->modules_rwlock))
1274 GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
1276 if (!agent->initialized)
1277 GOMP_PLUGIN_fatal ("Agent must be initialized");
1279 if (!kernel->initialized)
1280 GOMP_PLUGIN_fatal ("Called kernel must be initialized");
1282 struct GOMP_hsa_kernel_dispatch *shadow
1283 = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
1285 if (debug)
1287 fprintf (stderr, "\nKernel has following dependencies:\n");
1288 print_kernel_dispatch (shadow, 2);
1291 uint64_t index = hsa_queue_add_write_index_release (agent->command_q, 1);
1292 HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
1294 /* Wait until the queue is not full before writing the packet. */
1295 while (index - hsa_queue_load_read_index_acquire (agent->command_q)
1296 >= agent->command_q->size)
1299 hsa_kernel_dispatch_packet_t *packet;
1300 packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
1301 + index % agent->command_q->size;
1303 memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
1304 packet->grid_size_x = kla->gdims[0];
1305 uint32_t wgs = kla->wdims[0];
1306 if (wgs == 0)
1307 /* TODO: Provide a default via environment. */
1308 wgs = 64;
1309 else if (wgs > kla->gdims[0])
1310 wgs = kla->gdims[0];
1311 packet->workgroup_size_x = wgs;
1312 packet->grid_size_y = 1;
1313 packet->workgroup_size_y = 1;
1314 packet->grid_size_z = 1;
1315 packet->workgroup_size_z = 1;
1316 packet->private_segment_size = kernel->private_segment_size;
1317 packet->group_segment_size = kernel->group_segment_size;
1318 packet->kernel_object = kernel->object;
1319 packet->kernarg_address = shadow->kernarg_address;
1320 hsa_signal_t s;
1321 s.handle = shadow->signal;
1322 packet->completion_signal = s;
1323 hsa_signal_store_relaxed (s, 1);
1324 memcpy (shadow->kernarg_address, &vars, sizeof (vars));
1326 /* PR hsa/70337. */
1327 size_t vars_size = sizeof (vars);
1328 if (kernel->kernarg_segment_size > vars_size)
1330 if (kernel->kernarg_segment_size != vars_size
1331 + sizeof (struct hsa_kernel_runtime *))
1332 GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
1333 memcpy (packet->kernarg_address + vars_size, &shadow,
1334 sizeof (struct hsa_kernel_runtime *));
1337 HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
1339 uint16_t header;
1340 header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
1341 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
1342 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
1344 HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
1346 packet_store_release ((uint32_t *) packet, header,
1347 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
1349 hsa_signal_store_release (agent->command_q->doorbell_signal, index);
1351 /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
1352 signal wait and signal load operations on their own and we need to
1353 periodically call the hsa_signal_load_acquire on completion signals of
1354 children kernels in the CPU to make that happen. As soon the
1355 limitation will be resolved, this workaround can be removed. */
1357 HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
1359 /* Root signal waits with 1ms timeout. */
1360 while (hsa_signal_wait_acquire (s, HSA_SIGNAL_CONDITION_LT, 1, 1000 * 1000,
1361 HSA_WAIT_STATE_BLOCKED) != 0)
1362 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1364 hsa_signal_t child_s;
1365 child_s.handle = shadow->children_dispatches[i]->signal;
1367 HSA_DEBUG ("Waiting for children completion signal: %lu\n",
1368 shadow->children_dispatches[i]->signal);
1369 hsa_signal_load_acquire (child_s);
1372 release_kernel_dispatch (shadow);
1374 if (pthread_rwlock_unlock (&agent->modules_rwlock))
1375 GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1378 /* Information to be passed to a thread running a kernel asycnronously. */
1380 struct async_run_info
1382 int device;
1383 void *tgt_fn;
1384 void *tgt_vars;
1385 void **args;
1386 void *async_data;
1389 /* Thread routine to run a kernel asynchronously. */
1391 static void *
1392 run_kernel_asynchronously (void *thread_arg)
1394 struct async_run_info *info = (struct async_run_info *) thread_arg;
1395 int device = info->device;
1396 void *tgt_fn = info->tgt_fn;
1397 void *tgt_vars = info->tgt_vars;
1398 void **args = info->args;
1399 void *async_data = info->async_data;
1401 free (info);
1402 GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
1403 GOMP_PLUGIN_target_task_completion (async_data);
1404 return NULL;
1407 /* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
1408 does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
1409 has finished. */
1411 void
1412 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
1413 void **args, void *async_data)
1415 pthread_t pt;
1416 struct async_run_info *info;
1417 HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n")
1418 info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
1420 info->device = device;
1421 info->tgt_fn = tgt_fn;
1422 info->tgt_vars = tgt_vars;
1423 info->args = args;
1424 info->async_data = async_data;
1426 int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
1427 if (err != 0)
1428 GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
1429 strerror (err));
1430 err = pthread_detach (pt);
1431 if (err != 0)
1432 GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
1433 "asynchronously: %s", strerror (err));
1436 /* Deinitialize all information associated with MODULE and kernels within
1437 it. Return TRUE on success. */
1439 static bool
1440 destroy_module (struct module_info *module)
1442 int i;
1443 for (i = 0; i < module->kernel_count; i++)
1444 if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
1446 GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
1447 "mutex");
1448 return false;
1450 return true;
1453 /* Part of the libgomp plugin interface. Unload BRIG module described by
1454 struct brig_image_desc in TARGET_DATA from agent number N. Return
1455 TRUE on success. */
1457 bool
1458 GOMP_OFFLOAD_unload_image (int n, unsigned version, void *target_data)
1460 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
1462 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
1463 " (expected %u, received %u)",
1464 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
1465 return false;
1468 struct agent_info *agent;
1469 agent = get_agent_info (n);
1470 if (!agent)
1471 return false;
1473 if (pthread_rwlock_wrlock (&agent->modules_rwlock))
1475 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
1476 return false;
1478 struct module_info *module = agent->first_module;
1479 while (module)
1481 if (module->image_desc == target_data)
1482 break;
1483 module = module->next;
1485 if (!module)
1487 GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
1488 "loaded before");
1489 return false;
1492 remove_module_from_agent (agent, module);
1493 if (!destroy_module (module))
1494 return false;
1495 free (module);
1496 if (!destroy_hsa_program (agent))
1497 return false;
1498 if (pthread_rwlock_unlock (&agent->modules_rwlock))
1500 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
1501 return false;
1503 return true;
1506 /* Part of the libgomp plugin interface. Deinitialize all information and
1507 status associated with agent number N. We do not attempt any
1508 synchronization, assuming the user and libgomp will not attempt
1509 deinitialization of a device that is in any way being used at the same
1510 time. Return TRUE on success. */
1512 bool
1513 GOMP_OFFLOAD_fini_device (int n)
1515 struct agent_info *agent = get_agent_info (n);
1516 if (!agent)
1517 return false;
1519 if (!agent->initialized)
1520 return true;
1522 struct module_info *next_module = agent->first_module;
1523 while (next_module)
1525 struct module_info *module = next_module;
1526 next_module = module->next;
1527 if (!destroy_module (module))
1528 return false;
1529 free (module);
1531 agent->first_module = NULL;
1532 if (!destroy_hsa_program (agent))
1533 return false;
1535 release_agent_shared_libraries (agent);
1537 hsa_status_t status = hsa_queue_destroy (agent->command_q);
1538 if (status != HSA_STATUS_SUCCESS)
1539 return hsa_error ("Error destroying command queue", status);
1540 status = hsa_queue_destroy (agent->kernel_dispatch_command_q);
1541 if (status != HSA_STATUS_SUCCESS)
1542 return hsa_error ("Error destroying kernel dispatch command queue", status);
1543 if (pthread_mutex_destroy (&agent->prog_mutex))
1545 GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
1546 return false;
1548 if (pthread_rwlock_destroy (&agent->modules_rwlock))
1550 GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
1551 return false;
1553 agent->initialized = false;
1554 return true;
1557 /* Part of the libgomp plugin interface. Not implemented as it is not required
1558 for HSA. */
1560 void *
1561 GOMP_OFFLOAD_alloc (int ord, size_t size)
1563 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
1564 "it should never be called");
1565 return NULL;
1568 /* Part of the libgomp plugin interface. Not implemented as it is not required
1569 for HSA. */
1571 bool
1572 GOMP_OFFLOAD_free (int ord, void *ptr)
1574 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
1575 "it should never be called");
1576 return false;
1579 /* Part of the libgomp plugin interface. Not implemented as it is not required
1580 for HSA. */
1582 bool
1583 GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
1585 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
1586 "it should never be called");
1587 return false;
1590 /* Part of the libgomp plugin interface. Not implemented as it is not required
1591 for HSA. */
1593 bool
1594 GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
1596 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
1597 "it should never be called");
1598 return false;
1601 /* Part of the libgomp plugin interface. Not implemented as it is not required
1602 for HSA. */
1604 bool
1605 GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
1607 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
1608 "it should never be called");
1609 return false;