Daily bump.
[official-gcc.git] / libgomp / plugin / plugin-hsa.c
blob90ca24719d087d731f591e1653d2fcc27b8fc9bd
1 /* Plugin for HSAIL execution.
3 Copyright (C) 2013-2017 Free Software Foundation, Inc.
5 Contributed by Martin Jambor <mjambor@suse.cz> and
6 Martin Liska <mliska@suse.cz>.
8 This file is part of the GNU Offloading and Multi Processing Library
9 (libgomp).
11 Libgomp is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3, or (at your option)
14 any later version.
16 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
17 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 more details.
21 Under Section 7 of GPL version 3, you are granted additional
22 permissions described in the GCC Runtime Library Exception, version
23 3.1, as published by the Free Software Foundation.
25 You should have received a copy of the GNU General Public License and
26 a copy of the GCC Runtime Library Exception along with this program;
27 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
28 <http://www.gnu.org/licenses/>. */
30 #include "config.h"
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <pthread.h>
35 #include <inttypes.h>
36 #include <stdbool.h>
37 #include <hsa.h>
38 #include <plugin/hsa_ext_finalize.h>
39 #include <dlfcn.h>
40 #include "libgomp-plugin.h"
41 #include "gomp-constants.h"
43 /* Secure getenv() which returns NULL if running as SUID/SGID. */
44 #ifndef HAVE_SECURE_GETENV
45 #ifdef HAVE___SECURE_GETENV
46 #define secure_getenv __secure_getenv
47 #elif defined (HAVE_UNISTD_H) && defined(HAVE_GETUID) && defined(HAVE_GETEUID) \
48 && defined(HAVE_GETGID) && defined(HAVE_GETEGID)
50 #include <unistd.h>
52 /* Implementation of secure_getenv() for targets where it is not provided but
53 we have at least means to test real and effective IDs. */
55 static char *
56 secure_getenv (const char *name)
58 if ((getuid () == geteuid ()) && (getgid () == getegid ()))
59 return getenv (name);
60 else
61 return NULL;
64 #else
65 #define secure_getenv getenv
66 #endif
67 #endif
69 /* As an HSA runtime is dlopened, following structure defines function
70 pointers utilized by the HSA plug-in. */
72 struct hsa_runtime_fn_info
74 /* HSA runtime. */
75 hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
76 const char **status_string);
77 hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
78 hsa_agent_info_t attribute,
79 void *value);
80 hsa_status_t (*hsa_init_fn) (void);
81 hsa_status_t (*hsa_iterate_agents_fn)
82 (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
83 hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
84 hsa_region_info_t attribute,
85 void *value);
86 hsa_status_t (*hsa_queue_create_fn)
87 (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
88 void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
89 void *data, uint32_t private_segment_size,
90 uint32_t group_segment_size, hsa_queue_t **queue);
91 hsa_status_t (*hsa_agent_iterate_regions_fn)
92 (hsa_agent_t agent,
93 hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
94 hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
95 hsa_status_t (*hsa_executable_create_fn)
96 (hsa_profile_t profile, hsa_executable_state_t executable_state,
97 const char *options, hsa_executable_t *executable);
98 hsa_status_t (*hsa_executable_global_variable_define_fn)
99 (hsa_executable_t executable, const char *variable_name, void *address);
100 hsa_status_t (*hsa_executable_load_code_object_fn)
101 (hsa_executable_t executable, hsa_agent_t agent,
102 hsa_code_object_t code_object, const char *options);
103 hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable,
104 const char *options);
105 hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
106 uint32_t num_consumers,
107 const hsa_agent_t *consumers,
108 hsa_signal_t *signal);
109 hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
110 void **ptr);
111 hsa_status_t (*hsa_memory_free_fn) (void *ptr);
112 hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
113 hsa_status_t (*hsa_executable_get_symbol_fn)
114 (hsa_executable_t executable, const char *module_name,
115 const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
116 hsa_executable_symbol_t *symbol);
117 hsa_status_t (*hsa_executable_symbol_get_info_fn)
118 (hsa_executable_symbol_t executable_symbol,
119 hsa_executable_symbol_info_t attribute, void *value);
120 uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
121 uint64_t value);
122 uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
123 void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
124 hsa_signal_value_t value);
125 void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
126 hsa_signal_value_t value);
127 hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
128 (hsa_signal_t signal, hsa_signal_condition_t condition,
129 hsa_signal_value_t compare_value, uint64_t timeout_hint,
130 hsa_wait_state_t wait_state_hint);
131 hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal);
132 hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
134 /* HSA finalizer. */
135 hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program,
136 hsa_ext_module_t module);
137 hsa_status_t (*hsa_ext_program_create_fn)
138 (hsa_machine_model_t machine_model, hsa_profile_t profile,
139 hsa_default_float_rounding_mode_t default_float_rounding_mode,
140 const char *options, hsa_ext_program_t *program);
141 hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program);
142 hsa_status_t (*hsa_ext_program_finalize_fn)
143 (hsa_ext_program_t program,hsa_isa_t isa,
144 int32_t call_convention, hsa_ext_control_directives_t control_directives,
145 const char *options, hsa_code_object_type_t code_object_type,
146 hsa_code_object_t *code_object);
149 /* HSA runtime functions that are initialized in init_hsa_context. */
151 static struct hsa_runtime_fn_info hsa_fns;
153 /* Keep the following GOMP prefixed structures in sync with respective parts of
154 the compiler. */
156 /* Structure describing the run-time and grid properties of an HSA kernel
157 lauch. */
159 struct GOMP_kernel_launch_attributes
161 /* Number of dimensions the workload has. Maximum number is 3. */
162 uint32_t ndim;
163 /* Size of the grid in the three respective dimensions. */
164 uint32_t gdims[3];
165 /* Size of work-groups in the respective dimensions. */
166 uint32_t wdims[3];
169 /* Collection of information needed for a dispatch of a kernel from a
170 kernel. */
172 struct GOMP_hsa_kernel_dispatch
174 /* Pointer to a command queue associated with a kernel dispatch agent. */
175 void *queue;
176 /* Pointer to reserved memory for OMP data struct copying. */
177 void *omp_data_memory;
178 /* Pointer to a memory space used for kernel arguments passing. */
179 void *kernarg_address;
180 /* Kernel object. */
181 uint64_t object;
182 /* Synchronization signal used for dispatch synchronization. */
183 uint64_t signal;
184 /* Private segment size. */
185 uint32_t private_segment_size;
186 /* Group segment size. */
187 uint32_t group_segment_size;
188 /* Number of children kernel dispatches. */
189 uint64_t kernel_dispatch_count;
190 /* Debug purpose argument. */
191 uint64_t debug;
192 /* Levels-var ICV. */
193 uint64_t omp_level;
194 /* Kernel dispatch structures created for children kernel dispatches. */
195 struct GOMP_hsa_kernel_dispatch **children_dispatches;
196 /* Number of threads. */
197 uint32_t omp_num_threads;
200 /* Part of the libgomp plugin interface. Return the name of the accelerator,
201 which is "hsa". */
203 const char *
204 GOMP_OFFLOAD_get_name (void)
206 return "hsa";
209 /* Part of the libgomp plugin interface. Return the specific capabilities the
210 HSA accelerator have. */
212 unsigned int
213 GOMP_OFFLOAD_get_caps (void)
215 return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
218 /* Part of the libgomp plugin interface. Identify as HSA accelerator. */
221 GOMP_OFFLOAD_get_type (void)
223 return OFFLOAD_TARGET_TYPE_HSA;
226 /* Return the libgomp version number we're compatible with. There is
227 no requirement for cross-version compatibility. */
229 unsigned
230 GOMP_OFFLOAD_version (void)
232 return GOMP_VERSION;
235 /* Flag to decide whether print to stderr information about what is going on.
236 Set in init_debug depending on environment variables. */
238 static bool debug;
240 /* Flag to decide if the runtime should suppress a possible fallback to host
241 execution. */
243 static bool suppress_host_fallback;
245 /* Flag to locate HSA runtime shared library that is dlopened
246 by this plug-in. */
248 static const char *hsa_runtime_lib;
250 /* Flag to decide if the runtime should support also CPU devices (can be
251 a simulator). */
253 static bool support_cpu_devices;
255 /* Initialize debug and suppress_host_fallback according to the environment. */
257 static void
258 init_enviroment_variables (void)
260 if (secure_getenv ("HSA_DEBUG"))
261 debug = true;
262 else
263 debug = false;
265 if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
266 suppress_host_fallback = true;
267 else
268 suppress_host_fallback = false;
270 hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB");
271 if (hsa_runtime_lib == NULL)
272 hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so";
274 support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
277 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
278 is set to true. */
280 #define HSA_LOG(prefix, ...) \
281 do \
283 if (debug) \
285 fprintf (stderr, prefix); \
286 fprintf (stderr, __VA_ARGS__); \
289 while (false);
291 /* Print a debugging message to stderr. */
293 #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
295 /* Print a warning message to stderr. */
297 #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
299 /* Print HSA warning STR with an HSA STATUS code. */
301 static void
302 hsa_warn (const char *str, hsa_status_t status)
304 if (!debug)
305 return;
307 const char *hsa_error_msg;
308 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
310 fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
313 /* Report a fatal error STR together with the HSA error corresponding to STATUS
314 and terminate execution of the current process. */
316 static void
317 hsa_fatal (const char *str, hsa_status_t status)
319 const char *hsa_error_msg;
320 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
321 GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
322 hsa_error_msg);
325 /* Like hsa_fatal, except only report error message, and return FALSE
326 for propagating error processing to outside of plugin. */
328 static bool
329 hsa_error (const char *str, hsa_status_t status)
331 const char *hsa_error_msg;
332 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
333 GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
334 hsa_error_msg);
335 return false;
338 struct hsa_kernel_description
340 const char *name;
341 unsigned omp_data_size;
342 bool gridified_kernel_p;
343 unsigned kernel_dependencies_count;
344 const char **kernel_dependencies;
347 struct global_var_info
349 const char *name;
350 void *address;
353 /* Data passed by the static initializer of a compilation unit containing BRIG
354 to GOMP_offload_register. */
356 struct brig_image_desc
358 hsa_ext_module_t brig_module;
359 const unsigned kernel_count;
360 struct hsa_kernel_description *kernel_infos;
361 const unsigned global_variable_count;
362 struct global_var_info *global_variables;
365 struct agent_info;
367 /* Information required to identify, finalize and run any given kernel. */
369 struct kernel_info
371 /* Name of the kernel, required to locate it within the brig module. */
372 const char *name;
373 /* Size of memory space for OMP data. */
374 unsigned omp_data_size;
375 /* The specific agent the kernel has been or will be finalized for and run
376 on. */
377 struct agent_info *agent;
378 /* The specific module where the kernel takes place. */
379 struct module_info *module;
380 /* Mutex enforcing that at most once thread ever initializes a kernel for
381 use. A thread should have locked agent->modules_rwlock for reading before
382 acquiring it. */
383 pthread_mutex_t init_mutex;
384 /* Flag indicating whether the kernel has been initialized and all fields
385 below it contain valid data. */
386 bool initialized;
387 /* Flag indicating that the kernel has a problem that blocks an execution. */
388 bool initialization_failed;
389 /* The object to be put into the dispatch queue. */
390 uint64_t object;
391 /* Required size of kernel arguments. */
392 uint32_t kernarg_segment_size;
393 /* Required size of group segment. */
394 uint32_t group_segment_size;
395 /* Required size of private segment. */
396 uint32_t private_segment_size;
397 /* List of all kernel dependencies. */
398 const char **dependencies;
399 /* Number of dependencies. */
400 unsigned dependencies_count;
401 /* Maximum OMP data size necessary for kernel from kernel dispatches. */
402 unsigned max_omp_data_size;
403 /* True if the kernel is gridified. */
404 bool gridified_kernel_p;
407 /* Information about a particular brig module, its image and kernels. */
409 struct module_info
411 /* The next and previous module in the linked list of modules of an agent. */
412 struct module_info *next, *prev;
413 /* The description with which the program has registered the image. */
414 struct brig_image_desc *image_desc;
416 /* Number of kernels in this module. */
417 int kernel_count;
418 /* An array of kernel_info structures describing each kernel in this
419 module. */
420 struct kernel_info kernels[];
423 /* Information about shared brig library. */
425 struct brig_library_info
427 char *file_name;
428 hsa_ext_module_t image;
431 /* Description of an HSA GPU agent and the program associated with it. */
433 struct agent_info
435 /* The HSA ID of the agent. Assigned when hsa_context is initialized. */
436 hsa_agent_t id;
437 /* Whether the agent has been initialized. The fields below are usable only
438 if it has been. */
439 bool initialized;
440 /* The HSA ISA of this agent. */
441 hsa_isa_t isa;
442 /* Command queue of the agent. */
443 hsa_queue_t *command_q;
444 /* Kernel from kernel dispatch command queue. */
445 hsa_queue_t *kernel_dispatch_command_q;
446 /* The HSA memory region from which to allocate kernel arguments. */
447 hsa_region_t kernarg_region;
449 /* Read-write lock that protects kernels which are running or about to be run
450 from interference with loading and unloading of images. Needs to be
451 locked for reading while a kernel is being run, and for writing if the
452 list of modules is manipulated (and thus the HSA program invalidated). */
453 pthread_rwlock_t modules_rwlock;
454 /* The first module in a linked list of modules associated with this
455 kernel. */
456 struct module_info *first_module;
458 /* Mutex enforcing that only one thread will finalize the HSA program. A
459 thread should have locked agent->modules_rwlock for reading before
460 acquiring it. */
461 pthread_mutex_t prog_mutex;
462 /* Flag whether the HSA program that consists of all the modules has been
463 finalized. */
464 bool prog_finalized;
465 /* Flag whether the program was finalized but with a failure. */
466 bool prog_finalized_error;
467 /* HSA executable - the finalized program that is used to locate kernels. */
468 hsa_executable_t executable;
469 /* List of BRIG libraries. */
470 struct brig_library_info **brig_libraries;
471 /* Number of loaded shared BRIG libraries. */
472 unsigned brig_libraries_count;
475 /* Information about the whole HSA environment and all of its agents. */
477 struct hsa_context_info
479 /* Whether the structure has been initialized. */
480 bool initialized;
481 /* Number of usable GPU HSA agents in the system. */
482 int agent_count;
483 /* Array of agent_info structures describing the individual HSA agents. */
484 struct agent_info *agents;
487 /* Information about the whole HSA environment and all of its agents. */
489 static struct hsa_context_info hsa_context;
491 #define DLSYM_FN(function) \
492 hsa_fns.function##_fn = dlsym (handle, #function); \
493 if (hsa_fns.function##_fn == NULL) \
494 goto dl_fail;
496 static bool
497 init_hsa_runtime_functions (void)
499 void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
500 if (handle == NULL)
501 goto dl_fail;
503 DLSYM_FN (hsa_status_string)
504 DLSYM_FN (hsa_agent_get_info)
505 DLSYM_FN (hsa_init)
506 DLSYM_FN (hsa_iterate_agents)
507 DLSYM_FN (hsa_region_get_info)
508 DLSYM_FN (hsa_queue_create)
509 DLSYM_FN (hsa_agent_iterate_regions)
510 DLSYM_FN (hsa_executable_destroy)
511 DLSYM_FN (hsa_executable_create)
512 DLSYM_FN (hsa_executable_global_variable_define)
513 DLSYM_FN (hsa_executable_load_code_object)
514 DLSYM_FN (hsa_executable_freeze)
515 DLSYM_FN (hsa_signal_create)
516 DLSYM_FN (hsa_memory_allocate)
517 DLSYM_FN (hsa_memory_free)
518 DLSYM_FN (hsa_signal_destroy)
519 DLSYM_FN (hsa_executable_get_symbol)
520 DLSYM_FN (hsa_executable_symbol_get_info)
521 DLSYM_FN (hsa_queue_add_write_index_release)
522 DLSYM_FN (hsa_queue_load_read_index_acquire)
523 DLSYM_FN (hsa_signal_wait_acquire)
524 DLSYM_FN (hsa_signal_store_relaxed)
525 DLSYM_FN (hsa_signal_store_release)
526 DLSYM_FN (hsa_signal_load_acquire)
527 DLSYM_FN (hsa_queue_destroy)
528 DLSYM_FN (hsa_ext_program_add_module)
529 DLSYM_FN (hsa_ext_program_create)
530 DLSYM_FN (hsa_ext_program_destroy)
531 DLSYM_FN (hsa_ext_program_finalize)
532 return true;
534 dl_fail:
535 HSA_DEBUG ("while loading %s: %s\n", hsa_runtime_lib, dlerror ());
536 return false;
539 /* Find kernel for an AGENT by name provided in KERNEL_NAME. */
541 static struct kernel_info *
542 get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
544 struct module_info *module = agent->first_module;
546 while (module)
548 for (unsigned i = 0; i < module->kernel_count; i++)
549 if (strcmp (module->kernels[i].name, kernel_name) == 0)
550 return &module->kernels[i];
552 module = module->next;
555 return NULL;
558 /* Return true if the agent is a GPU and acceptable of concurrent submissions
559 from different threads. */
561 static bool
562 suitable_hsa_agent_p (hsa_agent_t agent)
564 hsa_device_type_t device_type;
565 hsa_status_t status
566 = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
567 &device_type);
568 if (status != HSA_STATUS_SUCCESS)
569 return false;
571 switch (device_type)
573 case HSA_DEVICE_TYPE_GPU:
574 break;
575 case HSA_DEVICE_TYPE_CPU:
576 if (!support_cpu_devices)
577 return false;
578 break;
579 default:
580 return false;
583 uint32_t features = 0;
584 status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE,
585 &features);
586 if (status != HSA_STATUS_SUCCESS
587 || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
588 return false;
589 hsa_queue_type_t queue_type;
590 status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE,
591 &queue_type);
592 if (status != HSA_STATUS_SUCCESS
593 || (queue_type != HSA_QUEUE_TYPE_MULTI))
594 return false;
596 return true;
599 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
600 agent_count in hsa_context. */
602 static hsa_status_t
603 count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
605 if (suitable_hsa_agent_p (agent))
606 hsa_context.agent_count++;
607 return HSA_STATUS_SUCCESS;
610 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
611 id to the describing structure in the hsa context. The index of the
612 structure is pointed to by DATA, increment it afterwards. */
614 static hsa_status_t
615 assign_agent_ids (hsa_agent_t agent, void *data)
617 if (suitable_hsa_agent_p (agent))
619 int *agent_index = (int *) data;
620 hsa_context.agents[*agent_index].id = agent;
621 ++*agent_index;
623 return HSA_STATUS_SUCCESS;
626 /* Initialize hsa_context if it has not already been done.
627 Return TRUE on success. */
629 static bool
630 init_hsa_context (void)
632 hsa_status_t status;
633 int agent_index = 0;
635 if (hsa_context.initialized)
636 return true;
637 init_enviroment_variables ();
638 if (!init_hsa_runtime_functions ())
640 HSA_DEBUG ("Run-time could not be dynamically opened\n");
641 return false;
643 status = hsa_fns.hsa_init_fn ();
644 if (status != HSA_STATUS_SUCCESS)
645 return hsa_error ("Run-time could not be initialized", status);
646 HSA_DEBUG ("HSA run-time initialized\n");
647 status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL);
648 if (status != HSA_STATUS_SUCCESS)
649 return hsa_error ("HSA GPU devices could not be enumerated", status);
650 HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
652 hsa_context.agents
653 = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
654 * sizeof (struct agent_info));
655 status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
656 if (agent_index != hsa_context.agent_count)
658 GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
659 return false;
661 hsa_context.initialized = true;
662 return true;
665 /* Callback of dispatch queues to report errors. */
667 static void
668 queue_callback (hsa_status_t status,
669 hsa_queue_t *queue __attribute__ ((unused)),
670 void *data __attribute__ ((unused)))
672 hsa_fatal ("Asynchronous queue error", status);
675 /* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
676 used for kernarg allocations and if so write it to the memory pointed to by
677 DATA and break the query. */
679 static hsa_status_t
680 get_kernarg_memory_region (hsa_region_t region, void *data)
682 hsa_status_t status;
683 hsa_region_segment_t segment;
685 status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT,
686 &segment);
687 if (status != HSA_STATUS_SUCCESS)
688 return status;
689 if (segment != HSA_REGION_SEGMENT_GLOBAL)
690 return HSA_STATUS_SUCCESS;
692 uint32_t flags;
693 status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
694 &flags);
695 if (status != HSA_STATUS_SUCCESS)
696 return status;
697 if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
699 hsa_region_t *ret = (hsa_region_t *) data;
700 *ret = region;
701 return HSA_STATUS_INFO_BREAK;
703 return HSA_STATUS_SUCCESS;
706 /* Part of the libgomp plugin interface. Return the number of HSA devices on
707 the system. */
710 GOMP_OFFLOAD_get_num_devices (void)
712 if (!init_hsa_context ())
713 return 0;
714 return hsa_context.agent_count;
717 /* Part of the libgomp plugin interface. Initialize agent number N so that it
718 can be used for computation. Return TRUE on success. */
720 bool
721 GOMP_OFFLOAD_init_device (int n)
723 if (!init_hsa_context ())
724 return false;
725 if (n >= hsa_context.agent_count)
727 GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
728 return false;
730 struct agent_info *agent = &hsa_context.agents[n];
732 if (agent->initialized)
733 return true;
735 if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
737 GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
738 return false;
740 if (pthread_mutex_init (&agent->prog_mutex, NULL))
742 GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
743 return false;
746 uint32_t queue_size;
747 hsa_status_t status;
748 status = hsa_fns.hsa_agent_get_info_fn (agent->id,
749 HSA_AGENT_INFO_QUEUE_MAX_SIZE,
750 &queue_size);
751 if (status != HSA_STATUS_SUCCESS)
752 return hsa_error ("Error requesting maximum queue size of the HSA agent",
753 status);
754 status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA,
755 &agent->isa);
756 if (status != HSA_STATUS_SUCCESS)
757 return hsa_error ("Error querying the ISA of the agent", status);
758 status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
759 HSA_QUEUE_TYPE_MULTI,
760 queue_callback, NULL, UINT32_MAX,
761 UINT32_MAX,
762 &agent->command_q);
763 if (status != HSA_STATUS_SUCCESS)
764 return hsa_error ("Error creating command queue", status);
766 status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
767 HSA_QUEUE_TYPE_MULTI,
768 queue_callback, NULL, UINT32_MAX,
769 UINT32_MAX,
770 &agent->kernel_dispatch_command_q);
771 if (status != HSA_STATUS_SUCCESS)
772 return hsa_error ("Error creating kernel dispatch command queue", status);
774 agent->kernarg_region.handle = (uint64_t) -1;
775 status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
776 get_kernarg_memory_region,
777 &agent->kernarg_region);
778 if (agent->kernarg_region.handle == (uint64_t) -1)
780 GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
781 "arguments");
782 return false;
784 HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
785 (long long unsigned) agent->command_q->id);
786 HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
787 (long long unsigned) agent->kernel_dispatch_command_q->id);
788 agent->initialized = true;
789 return true;
792 /* Verify that hsa_context has already been initialized and return the
793 agent_info structure describing device number N. Return NULL on error. */
795 static struct agent_info *
796 get_agent_info (int n)
798 if (!hsa_context.initialized)
800 GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
801 return NULL;
803 if (n >= hsa_context.agent_count)
805 GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
806 return NULL;
808 if (!hsa_context.agents[n].initialized)
810 GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
811 return NULL;
813 return &hsa_context.agents[n];
816 /* Insert MODULE to the linked list of modules of AGENT. */
818 static void
819 add_module_to_agent (struct agent_info *agent, struct module_info *module)
821 if (agent->first_module)
822 agent->first_module->prev = module;
823 module->next = agent->first_module;
824 module->prev = NULL;
825 agent->first_module = module;
828 /* Remove MODULE from the linked list of modules of AGENT. */
830 static void
831 remove_module_from_agent (struct agent_info *agent, struct module_info *module)
833 if (agent->first_module == module)
834 agent->first_module = module->next;
835 if (module->prev)
836 module->prev->next = module->next;
837 if (module->next)
838 module->next->prev = module->prev;
841 /* Free the HSA program in agent and everything associated with it and set
842 agent->prog_finalized and the initialized flags of all kernels to false.
843 Return TRUE on success. */
845 static bool
846 destroy_hsa_program (struct agent_info *agent)
848 if (!agent->prog_finalized || agent->prog_finalized_error)
849 return true;
851 hsa_status_t status;
853 HSA_DEBUG ("Destroying the current HSA program.\n");
855 status = hsa_fns.hsa_executable_destroy_fn (agent->executable);
856 if (status != HSA_STATUS_SUCCESS)
857 return hsa_error ("Could not destroy HSA executable", status);
859 struct module_info *module;
860 for (module = agent->first_module; module; module = module->next)
862 int i;
863 for (i = 0; i < module->kernel_count; i++)
864 module->kernels[i].initialized = false;
866 agent->prog_finalized = false;
867 return true;
870 /* Initialize KERNEL from D and other parameters. Return true on success. */
872 static bool
873 init_basic_kernel_info (struct kernel_info *kernel,
874 struct hsa_kernel_description *d,
875 struct agent_info *agent,
876 struct module_info *module)
878 kernel->agent = agent;
879 kernel->module = module;
880 kernel->name = d->name;
881 kernel->omp_data_size = d->omp_data_size;
882 kernel->gridified_kernel_p = d->gridified_kernel_p;
883 kernel->dependencies_count = d->kernel_dependencies_count;
884 kernel->dependencies = d->kernel_dependencies;
885 if (pthread_mutex_init (&kernel->init_mutex, NULL))
887 GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
888 return false;
890 return true;
893 /* Part of the libgomp plugin interface. Load BRIG module described by struct
894 brig_image_desc in TARGET_DATA and return references to kernel descriptors
895 in TARGET_TABLE. */
898 GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
899 struct addr_pair **target_table)
901 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
903 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
904 " (expected %u, received %u)",
905 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
906 return -1;
909 struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
910 struct agent_info *agent;
911 struct addr_pair *pair;
912 struct module_info *module;
913 struct kernel_info *kernel;
914 int kernel_count = image_desc->kernel_count;
916 agent = get_agent_info (ord);
917 if (!agent)
918 return -1;
920 if (pthread_rwlock_wrlock (&agent->modules_rwlock))
922 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
923 return -1;
925 if (agent->prog_finalized
926 && !destroy_hsa_program (agent))
927 return -1;
929 HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
930 pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
931 *target_table = pair;
932 module = (struct module_info *)
933 GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
934 + kernel_count * sizeof (struct kernel_info));
935 module->image_desc = image_desc;
936 module->kernel_count = kernel_count;
938 kernel = &module->kernels[0];
940 /* Allocate memory for kernel dependencies. */
941 for (unsigned i = 0; i < kernel_count; i++)
943 pair->start = (uintptr_t) kernel;
944 pair->end = (uintptr_t) (kernel + 1);
946 struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
947 if (!init_basic_kernel_info (kernel, d, agent, module))
948 return -1;
949 kernel++;
950 pair++;
953 add_module_to_agent (agent, module);
954 if (pthread_rwlock_unlock (&agent->modules_rwlock))
956 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
957 return -1;
959 return kernel_count;
962 /* Add a shared BRIG library from a FILE_NAME to an AGENT. */
964 static struct brig_library_info *
965 add_shared_library (const char *file_name, struct agent_info *agent)
967 struct brig_library_info *library = NULL;
969 void *f = dlopen (file_name, RTLD_NOW);
970 void *start = dlsym (f, "__brig_start");
971 void *end = dlsym (f, "__brig_end");
973 if (start == NULL || end == NULL)
974 return NULL;
976 unsigned size = end - start;
977 char *buf = (char *) GOMP_PLUGIN_malloc (size);
978 memcpy (buf, start, size);
980 library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
981 library->file_name = (char *) GOMP_PLUGIN_malloc
982 ((strlen (file_name) + 1));
983 strcpy (library->file_name, file_name);
984 library->image = (hsa_ext_module_t) buf;
986 return library;
989 /* Release memory used for BRIG shared libraries that correspond
990 to an AGENT. */
992 static void
993 release_agent_shared_libraries (struct agent_info *agent)
995 for (unsigned i = 0; i < agent->brig_libraries_count; i++)
996 if (agent->brig_libraries[i])
998 free (agent->brig_libraries[i]->file_name);
999 free (agent->brig_libraries[i]->image);
1000 free (agent->brig_libraries[i]);
1003 free (agent->brig_libraries);
1006 /* Create and finalize the program consisting of all loaded modules. */
1008 static void
1009 create_and_finalize_hsa_program (struct agent_info *agent)
1011 hsa_status_t status;
1012 hsa_ext_program_t prog_handle;
1013 int mi = 0;
1015 if (pthread_mutex_lock (&agent->prog_mutex))
1016 GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
1017 if (agent->prog_finalized)
1018 goto final;
1020 status = hsa_fns.hsa_ext_program_create_fn
1021 (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
1022 HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
1023 NULL, &prog_handle);
1024 if (status != HSA_STATUS_SUCCESS)
1025 hsa_fatal ("Could not create an HSA program", status);
1027 HSA_DEBUG ("Created a finalized program\n");
1029 struct module_info *module = agent->first_module;
1030 while (module)
1032 status = hsa_fns.hsa_ext_program_add_module_fn
1033 (prog_handle, module->image_desc->brig_module);
1034 if (status != HSA_STATUS_SUCCESS)
1035 hsa_fatal ("Could not add a module to the HSA program", status);
1036 module = module->next;
1037 mi++;
1040 /* Load all shared libraries. */
1041 const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
1042 const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
1044 agent->brig_libraries_count = libraries_count;
1045 agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
1046 (sizeof (struct brig_library_info) * libraries_count);
1048 for (unsigned i = 0; i < libraries_count; i++)
1050 struct brig_library_info *library = add_shared_library (libraries[i],
1051 agent);
1052 if (library == NULL)
1054 HSA_WARNING ("Could not open a shared BRIG library: %s\n",
1055 libraries[i]);
1056 continue;
1059 status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle,
1060 library->image);
1061 if (status != HSA_STATUS_SUCCESS)
1062 hsa_warn ("Could not add a shared BRIG library the HSA program",
1063 status);
1064 else
1065 HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
1066 libraries[i]);
1069 hsa_ext_control_directives_t control_directives;
1070 memset (&control_directives, 0, sizeof (control_directives));
1071 hsa_code_object_t code_object;
1072 status = hsa_fns.hsa_ext_program_finalize_fn
1073 (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
1074 control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
1075 if (status != HSA_STATUS_SUCCESS)
1077 hsa_warn ("Finalization of the HSA program failed", status);
1078 goto failure;
1081 HSA_DEBUG ("Finalization done\n");
1082 hsa_fns.hsa_ext_program_destroy_fn (prog_handle);
1084 status
1085 = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
1086 HSA_EXECUTABLE_STATE_UNFROZEN,
1087 "", &agent->executable);
1088 if (status != HSA_STATUS_SUCCESS)
1089 hsa_fatal ("Could not create HSA executable", status);
1091 module = agent->first_module;
1092 while (module)
1094 /* Initialize all global variables declared in the module. */
1095 for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
1097 struct global_var_info *var;
1098 var = &module->image_desc->global_variables[i];
1099 status = hsa_fns.hsa_executable_global_variable_define_fn
1100 (agent->executable, var->name, var->address);
1102 HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
1103 var->address);
1105 if (status != HSA_STATUS_SUCCESS)
1106 hsa_fatal ("Could not define a global variable in the HSA program",
1107 status);
1110 module = module->next;
1113 status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable,
1114 agent->id,
1115 code_object, "");
1116 if (status != HSA_STATUS_SUCCESS)
1117 hsa_fatal ("Could not add a code object to the HSA executable", status);
1118 status = hsa_fns.hsa_executable_freeze_fn (agent->executable, "");
1119 if (status != HSA_STATUS_SUCCESS)
1120 hsa_fatal ("Could not freeze the HSA executable", status);
1122 HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
1124 /* If all goes good, jump to final. */
1125 goto final;
1127 failure:
1128 agent->prog_finalized_error = true;
1130 final:
1131 agent->prog_finalized = true;
1133 if (pthread_mutex_unlock (&agent->prog_mutex))
1134 GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
1137 /* Create kernel dispatch data structure for given KERNEL. */
1139 static struct GOMP_hsa_kernel_dispatch *
1140 create_single_kernel_dispatch (struct kernel_info *kernel,
1141 unsigned omp_data_size)
1143 struct agent_info *agent = kernel->agent;
1144 struct GOMP_hsa_kernel_dispatch *shadow
1145 = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
1147 shadow->queue = agent->command_q;
1148 shadow->omp_data_memory
1149 = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
1150 unsigned dispatch_count = kernel->dependencies_count;
1151 shadow->kernel_dispatch_count = dispatch_count;
1153 shadow->children_dispatches
1154 = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
1156 shadow->object = kernel->object;
1158 hsa_signal_t sync_signal;
1159 hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal);
1160 if (status != HSA_STATUS_SUCCESS)
1161 hsa_fatal ("Error creating the HSA sync signal", status);
1163 shadow->signal = sync_signal.handle;
1164 shadow->private_segment_size = kernel->private_segment_size;
1165 shadow->group_segment_size = kernel->group_segment_size;
1167 status
1168 = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region,
1169 kernel->kernarg_segment_size,
1170 &shadow->kernarg_address);
1171 if (status != HSA_STATUS_SUCCESS)
1172 hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
1174 return shadow;
1177 /* Release data structure created for a kernel dispatch in SHADOW argument. */
1179 static void
1180 release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
1182 HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow,
1183 shadow->debug, (void *) shadow->debug);
1185 hsa_fns.hsa_memory_free_fn (shadow->kernarg_address);
1187 hsa_signal_t s;
1188 s.handle = shadow->signal;
1189 hsa_fns.hsa_signal_destroy_fn (s);
1191 free (shadow->omp_data_memory);
1193 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1194 release_kernel_dispatch (shadow->children_dispatches[i]);
1196 free (shadow->children_dispatches);
1197 free (shadow);
1200 /* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used
1201 to calculate maximum necessary memory for OMP data allocation. */
1203 static void
1204 init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
1206 hsa_status_t status;
1207 struct agent_info *agent = kernel->agent;
1208 hsa_executable_symbol_t kernel_symbol;
1209 status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
1210 kernel->name, agent->id,
1211 0, &kernel_symbol);
1212 if (status != HSA_STATUS_SUCCESS)
1214 hsa_warn ("Could not find symbol for kernel in the code object", status);
1215 goto failure;
1217 HSA_DEBUG ("Located kernel %s\n", kernel->name);
1218 status = hsa_fns.hsa_executable_symbol_get_info_fn
1219 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
1220 if (status != HSA_STATUS_SUCCESS)
1221 hsa_fatal ("Could not extract a kernel object from its symbol", status);
1222 status = hsa_fns.hsa_executable_symbol_get_info_fn
1223 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
1224 &kernel->kernarg_segment_size);
1225 if (status != HSA_STATUS_SUCCESS)
1226 hsa_fatal ("Could not get info about kernel argument size", status);
1227 status = hsa_fns.hsa_executable_symbol_get_info_fn
1228 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
1229 &kernel->group_segment_size);
1230 if (status != HSA_STATUS_SUCCESS)
1231 hsa_fatal ("Could not get info about kernel group segment size", status);
1232 status = hsa_fns.hsa_executable_symbol_get_info_fn
1233 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
1234 &kernel->private_segment_size);
1235 if (status != HSA_STATUS_SUCCESS)
1236 hsa_fatal ("Could not get info about kernel private segment size",
1237 status);
1239 HSA_DEBUG ("Kernel structure for %s fully initialized with "
1240 "following segment sizes: \n", kernel->name);
1241 HSA_DEBUG (" group_segment_size: %u\n",
1242 (unsigned) kernel->group_segment_size);
1243 HSA_DEBUG (" private_segment_size: %u\n",
1244 (unsigned) kernel->private_segment_size);
1245 HSA_DEBUG (" kernarg_segment_size: %u\n",
1246 (unsigned) kernel->kernarg_segment_size);
1247 HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size);
1248 HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
1250 if (kernel->omp_data_size > *max_omp_data_size)
1251 *max_omp_data_size = kernel->omp_data_size;
1253 for (unsigned i = 0; i < kernel->dependencies_count; i++)
1255 struct kernel_info *dependency
1256 = get_kernel_for_agent (agent, kernel->dependencies[i]);
1258 if (dependency == NULL)
1260 HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
1261 "dependency name: %s\n", kernel->name,
1262 kernel->dependencies[i]);
1263 goto failure;
1266 if (dependency->dependencies_count > 0)
1268 HSA_DEBUG ("HSA does not allow kernel dispatching code with "
1269 "a depth bigger than one\n")
1270 goto failure;
1273 init_single_kernel (dependency, max_omp_data_size);
1276 return;
1278 failure:
1279 kernel->initialization_failed = true;
1282 /* Indent stream F by INDENT spaces. */
1284 static void
1285 indent_stream (FILE *f, unsigned indent)
1287 fprintf (f, "%*s", indent, "");
1290 /* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */
1292 static void
1293 print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
1295 indent_stream (stderr, indent);
1296 fprintf (stderr, "this: %p\n", dispatch);
1297 indent_stream (stderr, indent);
1298 fprintf (stderr, "queue: %p\n", dispatch->queue);
1299 indent_stream (stderr, indent);
1300 fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
1301 indent_stream (stderr, indent);
1302 fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
1303 indent_stream (stderr, indent);
1304 fprintf (stderr, "object: %lu\n", dispatch->object);
1305 indent_stream (stderr, indent);
1306 fprintf (stderr, "signal: %lu\n", dispatch->signal);
1307 indent_stream (stderr, indent);
1308 fprintf (stderr, "private_segment_size: %u\n",
1309 dispatch->private_segment_size);
1310 indent_stream (stderr, indent);
1311 fprintf (stderr, "group_segment_size: %u\n",
1312 dispatch->group_segment_size);
1313 indent_stream (stderr, indent);
1314 fprintf (stderr, "children dispatches: %lu\n",
1315 dispatch->kernel_dispatch_count);
1316 indent_stream (stderr, indent);
1317 fprintf (stderr, "omp_num_threads: %u\n",
1318 dispatch->omp_num_threads);
1319 fprintf (stderr, "\n");
1321 for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
1322 print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
1325 /* Create kernel dispatch data structure for a KERNEL and all its
1326 dependencies. */
1328 static struct GOMP_hsa_kernel_dispatch *
1329 create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
1331 struct GOMP_hsa_kernel_dispatch *shadow
1332 = create_single_kernel_dispatch (kernel, omp_data_size);
1333 shadow->omp_num_threads = 64;
1334 shadow->debug = 0;
1335 shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
1337 /* Create kernel dispatch data structures. We do not allow to have
1338 a kernel dispatch with depth bigger than one. */
1339 for (unsigned i = 0; i < kernel->dependencies_count; i++)
1341 struct kernel_info *dependency
1342 = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
1343 shadow->children_dispatches[i]
1344 = create_single_kernel_dispatch (dependency, omp_data_size);
1345 shadow->children_dispatches[i]->queue
1346 = kernel->agent->kernel_dispatch_command_q;
1347 shadow->children_dispatches[i]->omp_level = 1;
1350 return shadow;
1353 /* Do all the work that is necessary before running KERNEL for the first time.
1354 The function assumes the program has been created, finalized and frozen by
1355 create_and_finalize_hsa_program. */
1357 static void
1358 init_kernel (struct kernel_info *kernel)
1360 if (pthread_mutex_lock (&kernel->init_mutex))
1361 GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
1362 if (kernel->initialized)
1364 if (pthread_mutex_unlock (&kernel->init_mutex))
1365 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1366 "mutex");
1368 return;
1371 /* Precomputed maximum size of OMP data necessary for a kernel from kernel
1372 dispatch operation. */
1373 init_single_kernel (kernel, &kernel->max_omp_data_size);
1375 if (!kernel->initialization_failed)
1376 HSA_DEBUG ("\n");
1378 kernel->initialized = true;
1379 if (pthread_mutex_unlock (&kernel->init_mutex))
1380 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1381 "mutex");
1384 /* Parse the target attributes INPUT provided by the compiler and return true
1385 if we should run anything all. If INPUT is NULL, fill DEF with default
1386 values, then store INPUT or DEF into *RESULT. */
1388 static bool
1389 parse_target_attributes (void **input,
1390 struct GOMP_kernel_launch_attributes *def,
1391 struct GOMP_kernel_launch_attributes **result)
1393 if (!input)
1394 GOMP_PLUGIN_fatal ("No target arguments provided");
1396 bool attrs_found = false;
1397 while (*input)
1399 uintptr_t id = (uintptr_t) *input;
1400 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
1401 && ((id & GOMP_TARGET_ARG_ID_MASK)
1402 == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
1404 input++;
1405 attrs_found = true;
1406 break;
1409 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
1410 input++;
1411 input++;
1414 if (!attrs_found)
1416 def->ndim = 1;
1417 def->gdims[0] = 1;
1418 def->gdims[1] = 1;
1419 def->gdims[2] = 1;
1420 def->wdims[0] = 1;
1421 def->wdims[1] = 1;
1422 def->wdims[2] = 1;
1423 *result = def;
1424 HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
1425 return true;
1428 struct GOMP_kernel_launch_attributes *kla;
1429 kla = (struct GOMP_kernel_launch_attributes *) *input;
1430 *result = kla;
1431 if (kla->ndim == 0 || kla->ndim > 3)
1432 GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim);
1434 HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim);
1435 unsigned i;
1436 for (i = 0; i < kla->ndim; i++)
1438 HSA_DEBUG (" Dimension %u: grid size %u and group size %u\n", i,
1439 kla->gdims[i], kla->wdims[i]);
1440 if (kla->gdims[i] == 0)
1441 return false;
1443 return true;
1446 /* Return the group size given the requested GROUP size, GRID size and number
1447 of grid dimensions NDIM. */
1449 static uint32_t
1450 get_group_size (uint32_t ndim, uint32_t grid, uint32_t group)
1452 if (group == 0)
1454 /* TODO: Provide a default via environment or device characteristics. */
1455 if (ndim == 1)
1456 group = 64;
1457 else if (ndim == 2)
1458 group = 8;
1459 else
1460 group = 4;
1463 if (group > grid)
1464 group = grid;
1465 return group;
1468 /* Return true if the HSA runtime can run function FN_PTR. */
1470 bool
1471 GOMP_OFFLOAD_can_run (void *fn_ptr)
1473 struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1474 struct agent_info *agent = kernel->agent;
1475 create_and_finalize_hsa_program (agent);
1477 if (agent->prog_finalized_error)
1478 goto failure;
1480 init_kernel (kernel);
1481 if (kernel->initialization_failed)
1482 goto failure;
1484 return true;
1486 failure:
1487 if (suppress_host_fallback)
1488 GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
1489 HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
1490 return false;
1493 /* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET. */
1495 void
1496 packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
1498 __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE);
1501 /* Run KERNEL on its agent, pass VARS to it as arguments and take
1502 launchattributes from KLA. */
1504 void
1505 run_kernel (struct kernel_info *kernel, void *vars,
1506 struct GOMP_kernel_launch_attributes *kla)
1508 struct agent_info *agent = kernel->agent;
1509 if (pthread_rwlock_rdlock (&agent->modules_rwlock))
1510 GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
1512 if (!agent->initialized)
1513 GOMP_PLUGIN_fatal ("Agent must be initialized");
1515 if (!kernel->initialized)
1516 GOMP_PLUGIN_fatal ("Called kernel must be initialized");
1518 struct GOMP_hsa_kernel_dispatch *shadow
1519 = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
1521 if (debug)
1523 fprintf (stderr, "\nKernel has following dependencies:\n");
1524 print_kernel_dispatch (shadow, 2);
1527 uint64_t index
1528 = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1);
1529 HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
1531 /* Wait until the queue is not full before writing the packet. */
1532 while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q)
1533 >= agent->command_q->size)
1536 hsa_kernel_dispatch_packet_t *packet;
1537 packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
1538 + index % agent->command_q->size;
1540 memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
1541 packet->grid_size_x = kla->gdims[0];
1542 packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0],
1543 kla->wdims[0]);
1545 if (kla->ndim >= 2)
1547 packet->grid_size_y = kla->gdims[1];
1548 packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1],
1549 kla->wdims[1]);
1551 else
1553 packet->grid_size_y = 1;
1554 packet->workgroup_size_y = 1;
1557 if (kla->ndim == 3)
1559 packet->grid_size_z = kla->gdims[2];
1560 packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2],
1561 kla->wdims[2]);
1563 else
1565 packet->grid_size_z = 1;
1566 packet->workgroup_size_z = 1;
1569 packet->private_segment_size = kernel->private_segment_size;
1570 packet->group_segment_size = kernel->group_segment_size;
1571 packet->kernel_object = kernel->object;
1572 packet->kernarg_address = shadow->kernarg_address;
1573 hsa_signal_t s;
1574 s.handle = shadow->signal;
1575 packet->completion_signal = s;
1576 hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
1577 memcpy (shadow->kernarg_address, &vars, sizeof (vars));
1579 /* PR hsa/70337. */
1580 size_t vars_size = sizeof (vars);
1581 if (kernel->kernarg_segment_size > vars_size)
1583 if (kernel->kernarg_segment_size != vars_size
1584 + sizeof (struct hsa_kernel_runtime *))
1585 GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
1586 memcpy (packet->kernarg_address + vars_size, &shadow,
1587 sizeof (struct hsa_kernel_runtime *));
1590 HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
1592 uint16_t header;
1593 header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
1594 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
1595 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
1597 HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
1599 packet_store_release ((uint32_t *) packet, header,
1600 (uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
1602 hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal,
1603 index);
1605 /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
1606 signal wait and signal load operations on their own and we need to
1607 periodically call the hsa_signal_load_acquire on completion signals of
1608 children kernels in the CPU to make that happen. As soon the
1609 limitation will be resolved, this workaround can be removed. */
1611 HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
1613 /* Root signal waits with 1ms timeout. */
1614 while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1,
1615 1000 * 1000,
1616 HSA_WAIT_STATE_BLOCKED) != 0)
1617 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1619 hsa_signal_t child_s;
1620 child_s.handle = shadow->children_dispatches[i]->signal;
1622 HSA_DEBUG ("Waiting for children completion signal: %lu\n",
1623 shadow->children_dispatches[i]->signal);
1624 hsa_fns.hsa_signal_load_acquire_fn (child_s);
1627 release_kernel_dispatch (shadow);
1629 if (pthread_rwlock_unlock (&agent->modules_rwlock))
1630 GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1633 /* Part of the libgomp plugin interface. Run a kernel on device N (the number
1634 is actually ignored, we assume the FN_PTR has been mapped using the correct
1635 device) and pass it an array of pointers in VARS as a parameter. The kernel
1636 is identified by FN_PTR which must point to a kernel_info structure. */
1638 void
1639 GOMP_OFFLOAD_run (int n __attribute__((unused)),
1640 void *fn_ptr, void *vars, void **args)
1642 struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1643 struct GOMP_kernel_launch_attributes def;
1644 struct GOMP_kernel_launch_attributes *kla;
1645 if (!parse_target_attributes (args, &def, &kla))
1647 HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
1648 return;
1650 run_kernel (kernel, vars, kla);
1653 /* Information to be passed to a thread running a kernel asycnronously. */
1655 struct async_run_info
1657 int device;
1658 void *tgt_fn;
1659 void *tgt_vars;
1660 void **args;
1661 void *async_data;
1664 /* Thread routine to run a kernel asynchronously. */
1666 static void *
1667 run_kernel_asynchronously (void *thread_arg)
1669 struct async_run_info *info = (struct async_run_info *) thread_arg;
1670 int device = info->device;
1671 void *tgt_fn = info->tgt_fn;
1672 void *tgt_vars = info->tgt_vars;
1673 void **args = info->args;
1674 void *async_data = info->async_data;
1676 free (info);
1677 GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
1678 GOMP_PLUGIN_target_task_completion (async_data);
1679 return NULL;
1682 /* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
1683 does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
1684 has finished. */
1686 void
1687 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
1688 void **args, void *async_data)
1690 pthread_t pt;
1691 struct async_run_info *info;
1692 HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n")
1693 info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
1695 info->device = device;
1696 info->tgt_fn = tgt_fn;
1697 info->tgt_vars = tgt_vars;
1698 info->args = args;
1699 info->async_data = async_data;
1701 int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
1702 if (err != 0)
1703 GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
1704 strerror (err));
1705 err = pthread_detach (pt);
1706 if (err != 0)
1707 GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
1708 "asynchronously: %s", strerror (err));
1711 /* Deinitialize all information associated with MODULE and kernels within
1712 it. Return TRUE on success. */
1714 static bool
1715 destroy_module (struct module_info *module)
1717 int i;
1718 for (i = 0; i < module->kernel_count; i++)
1719 if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
1721 GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
1722 "mutex");
1723 return false;
1725 return true;
1728 /* Part of the libgomp plugin interface. Unload BRIG module described by
1729 struct brig_image_desc in TARGET_DATA from agent number N. Return
1730 TRUE on success. */
1732 bool
1733 GOMP_OFFLOAD_unload_image (int n, unsigned version, const void *target_data)
1735 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
1737 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
1738 " (expected %u, received %u)",
1739 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
1740 return false;
1743 struct agent_info *agent;
1744 agent = get_agent_info (n);
1745 if (!agent)
1746 return false;
1748 if (pthread_rwlock_wrlock (&agent->modules_rwlock))
1750 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
1751 return false;
1753 struct module_info *module = agent->first_module;
1754 while (module)
1756 if (module->image_desc == target_data)
1757 break;
1758 module = module->next;
1760 if (!module)
1762 GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
1763 "loaded before");
1764 return false;
1767 remove_module_from_agent (agent, module);
1768 if (!destroy_module (module))
1769 return false;
1770 free (module);
1771 if (!destroy_hsa_program (agent))
1772 return false;
1773 if (pthread_rwlock_unlock (&agent->modules_rwlock))
1775 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
1776 return false;
1778 return true;
1781 /* Part of the libgomp plugin interface. Deinitialize all information and
1782 status associated with agent number N. We do not attempt any
1783 synchronization, assuming the user and libgomp will not attempt
1784 deinitialization of a device that is in any way being used at the same
1785 time. Return TRUE on success. */
1787 bool
1788 GOMP_OFFLOAD_fini_device (int n)
1790 struct agent_info *agent = get_agent_info (n);
1791 if (!agent)
1792 return false;
1794 if (!agent->initialized)
1795 return true;
1797 struct module_info *next_module = agent->first_module;
1798 while (next_module)
1800 struct module_info *module = next_module;
1801 next_module = module->next;
1802 if (!destroy_module (module))
1803 return false;
1804 free (module);
1806 agent->first_module = NULL;
1807 if (!destroy_hsa_program (agent))
1808 return false;
1810 release_agent_shared_libraries (agent);
1812 hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q);
1813 if (status != HSA_STATUS_SUCCESS)
1814 return hsa_error ("Error destroying command queue", status);
1815 status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q);
1816 if (status != HSA_STATUS_SUCCESS)
1817 return hsa_error ("Error destroying kernel dispatch command queue", status);
1818 if (pthread_mutex_destroy (&agent->prog_mutex))
1820 GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
1821 return false;
1823 if (pthread_rwlock_destroy (&agent->modules_rwlock))
1825 GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
1826 return false;
1828 agent->initialized = false;
1829 return true;
1832 /* Part of the libgomp plugin interface. Not implemented as it is not required
1833 for HSA. */
1835 void *
1836 GOMP_OFFLOAD_alloc (int ord, size_t size)
1838 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
1839 "it should never be called");
1840 return NULL;
1843 /* Part of the libgomp plugin interface. Not implemented as it is not required
1844 for HSA. */
1846 bool
1847 GOMP_OFFLOAD_free (int ord, void *ptr)
1849 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
1850 "it should never be called");
1851 return false;
1854 /* Part of the libgomp plugin interface. Not implemented as it is not required
1855 for HSA. */
1857 bool
1858 GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
1860 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
1861 "it should never be called");
1862 return false;
1865 /* Part of the libgomp plugin interface. Not implemented as it is not required
1866 for HSA. */
1868 bool
1869 GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
1871 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
1872 "it should never be called");
1873 return false;
1876 /* Part of the libgomp plugin interface. Not implemented as it is not required
1877 for HSA. */
1879 bool
1880 GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
1882 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
1883 "it should never be called");
1884 return false;